-
Notifications
You must be signed in to change notification settings - Fork 0
/
isChinese.js
134 lines (125 loc) · 3.19 KB
/
isChinese.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import isString from './isString'
/**
* 检测字符串是否为中文字符
* ========================================================================
* Wiki 介绍中文字符包含以下内容:
*
* 1. 中文汉字
* 2. 象形文字扩展 A-H
* 3. 兼容象形文字符
* 4. 兼容表意文字增补字符
* 5. 中文标点符号
* 6. 兼容标点符号
*
* 其中:
*
* 兼容象形文字符:[0xf900, 0xfaff],(https://en.wikipedia.org/wiki/CJK_Compatibility_Ideographs)和
* 兼容表意文字增补字符:[0x2f800, 0x2fa1f](https://en.wikipedia.org/wiki/CJK_Compatibility_Ideographs_Supplement)
*
* 只是看上去像汉字,因此在 isChinese() 方法中也没有纳入到汉字字符
* ========================================================================
* @method isChinese
* @since 1.2.0
* @param {String} str - (必须)检测字符串
* @param {Boolean} [includePunctuation] - (可选)是否包含标点符号:默认值:true
* @returns {boolean} - ‘val’ 是中文字符,返回 true,否则返回 false;
*/
const isChinese = (str, includePunctuation = true) => {
// 转换成正则表达式
const toRegExp = (range) => {
const pattern = range
.map((range) => {
const rangeStart = range[0]
const rangeEnd = range[1]
const hexStart = rangeStart.toString(16)
const hexEnd = rangeEnd.toString(16)
if (rangeStart === rangeEnd) {
return `\\u{${hexStart}}`
}
return `[\\u{${hexStart}}-\\u{${hexEnd}}]`
})
.join('|')
return new RegExp(`^(?:${pattern})+$`, 'u')
}
// 文字
// https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
const chineseIdeographs = [
// 中文汉字
[0x4e00, 0x9fff],
// 象形文字扩展 A - H
[0x3400, 0x4dbf],
[0x20000, 0x2a6df],
[0x2a700, 0x2b73f],
[0x2b740, 0x2b81f],
[0x2b820, 0x2ceaf],
[0x2ceb0, 0x2ebef],
[0x30000, 0x3134f],
[0x31350, 0x323af]
]
// 标点符号
const chinesePunctuations = [
// ,
[0xff0c, 0xff0c],
// 。
[0x3002, 0x3002],
// ·
[0x00b7, 0x00b7],
// ×
[0x00d7, 0x00d7],
// —
[0x2014, 0x2014],
// ‘
[0x2018, 0x2018],
// ’
[0x2019, 0x2019],
// “
[0x201c, 0x201c],
// ”
[0x201d, 0x201d],
// …
[0x2026, 0x2026],
// 、
[0x3001, 0x3001],
// 《
[0x300a, 0x300a],
// 》
[0x300b, 0x300b],
// 『
[0x300e, 0x300e],
// 』
[0x300f, 0x300f],
// 【
[0x3010, 0x3010],
// 】
[0x3011, 0x3011],
// !
[0xff01, 0xff01],
// (
[0xff08, 0xff08],
// )
[0xff09, 0xff09],
// :
[0xff1a, 0xff1a],
// ;
[0xff1b, 0xff1b],
// ?
[0xff1f, 0xff1f],
// ~
[0xff5e, 0xff5e],
// 兼容性标点符号
// https://en.wikipedia.org/wiki/CJK_Compatibility_Forms
[0xfe30, 0xfe4f]
]
const asciiChars = /\w+/
if (!isString(str)) {
return false
}
if (asciiChars.test(str)) {
return false
}
const pattern = includePunctuation
? toRegExp(chineseIdeographs.concat(chinesePunctuations))
: toRegExp(chineseIdeographs)
return pattern.test(str)
}
export default isChinese