-
Notifications
You must be signed in to change notification settings - Fork 37
/
gpy.go
180 lines (152 loc) · 3.35 KB
/
gpy.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
// Copyright (c) 2016 mozillazg
// Copyright (c) 2017 go-ego
//
// All rights reserved.
// Use of this source code is governed by a MIT style
// license that can be found in the LICENSE file.
package gpy
import (
"strings"
"unicode"
"github.com/go-ego/gse"
)
// Meta
const (
// Version get the gpy version
Version = "v0.40.0.133"
// License get the license
License = "MIT"
)
// GetVersion get the version
func GetVersion() string {
return Version
}
func args(arg ...Args) Args {
a := NewArgs()
if len(arg) > 0 {
a = arg[0]
}
return a
}
// IsChineseChar to determine whether the Chinese string
// 判断是否为中文字符串
func IsChineseChar(str string) bool {
for _, r := range str {
if unicode.Is(unicode.Scripts["Han"], r) {
return true
}
}
return false
}
// Pinyin 汉字转拼音,支持多音字模式、拼音与英文等字母混合.
func Pinyin(s string, arg ...Args) [][]string {
a := args(arg...)
pys := [][]string{}
var seg gse.Segmenter
sw := seg.SplitTextToWords([]byte(s))
for i := 0; i < len(sw); i++ {
s1 := string([]byte(sw[i]))
r := []rune(s1)
if len(r) <= 1 && unicode.Is(unicode.Han, r[0]) {
py := SinglePinyin(r[0], a)
if len(py) > 0 {
pys = append(pys, py)
}
} else {
var pya []string
pya = append(pya, s1)
pys = append(pys, pya)
}
}
return pys
}
// ToString trans pinyin [][]string to string
func ToString(p [][]string) (s string) {
i := 0
for _, p1 := range p {
r := []rune(p1[0])[0]
i++
if unicode.IsLetter(r) && i > 1 {
s += " " + p1[0]
} else {
if i > 1 || unicode.IsSpace(r) {
i = 0
}
s += p1[0]
}
}
return
}
// Py return to string pinyin
func Py(s string, a ...Args) string {
p := Pinyin(s, a...)
return ToString(p)
}
// SinglePinyin 把单个 `rune` 类型的汉字转换为拼音.
func SinglePinyin(r rune, a Args) []string {
if a.Fallback == nil {
a.Fallback = Fallback
}
value, ok := PinyinDict[int(r)]
if value == "" {
value, ok = PinyinDictAdd[int(r)]
}
var pys []string
if ok {
pys = strings.Split(value, ",")
} else {
pys = a.Fallback(r, a)
}
if len(pys) > 0 {
if !a.Heteronym {
pys = pys[:1]
}
return applyStyle(pys, a)
}
return pys
}
// HanPinyin 汉字转拼音,支持多音字模式.
func HanPinyin(s string, arg ...Args) [][]string {
a := args(arg...)
pys := [][]string{}
for _, r := range s {
py := SinglePinyin(r, a)
if len(py) > 0 {
pys = append(pys, py)
}
}
return pys
}
// LazyPinyin 汉字转拼音,与 `Pinyin` 的区别是:
// 返回值类型不同,并且不支持多音字模式,每个汉字只取第一个音.
func LazyPinyin(s string, arg ...Args) []string {
a := args(arg...)
a.Heteronym = false
pys := []string{}
for _, v := range HanPinyin(s, a) {
pys = append(pys, v[0])
}
return pys
}
// Slug join `LazyPinyin` 的返回值.
// 建议改用 https://github.com/mozillazg/go-slugify
func Slug(s string, a Args) string {
separator := a.Separator
return strings.Join(LazyPinyin(s, a), separator)
}
// Convert 跟 Pinyin 的唯一区别就是 a 参数可以是 nil
func Convert(s string, a *Args) [][]string {
if a == nil {
args := NewArgs()
a = &args
}
return Pinyin(s, *a)
}
// LazyConvert 跟 LazyPinyin 的唯一区别就是 a 参数可以是 nil
func LazyConvert(s string, a *Args) []string {
if a == nil {
args := NewArgs()
a = &args
}
return LazyPinyin(s, *a)
}