-
Notifications
You must be signed in to change notification settings - Fork 1
/
radicals.py
executable file
·186 lines (143 loc) · 4.96 KB
/
radicals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys
import string
from collections import namedtuple
Kanji = namedtuple('Kanji', 'character radicals')
Radical = namedtuple('Radical', 'id characters strokes readings meanings comment')
alternatives = {
'人': ['⺅', '𠆢'],
'丨': ['|'],
'丿': ['ノ'],
'乙': ['⺄'],
'小': ['⺍'],
'巛': ['巜'],
'心': ['㣺'],
'水': ['氺'],
'爪': ['爫', '爫'],
'牛': ['⺧'],
'玉': ['⺩'],
'示': ['⺬'],
'羊': ['⺶'],
'聿': ['⺻'],
'艸': ['䒑', '艹', '艹'],
'西': ['襾'],
'食': ['𩙿'],
}
def get_kanjis():
kanjis = []
# from https://raw.githubusercontent.com/jmettraux/kensaku/master/data/kradfile-u
with open('kradfile-u', 'r') as fp:
for i, line in enumerate(fp):
if line[0] == '#':
continue
split = line.split()
k = Kanji(split[0], split[2:])
kanjis.append(k)
return kanjis
def get_radicals(patch_alternatives=True):
radicals = []
# from https://raw.githubusercontent.com/mifunetoshiro/kanjium/master/data/source_files/radicals.txt
with open('radicals.txt', 'r') as fp:
for line in fp:
l = line.split('\t')
id_ = int(l[2])
characters = [l[0], l[1]] if l[1] else [l[0]]
if patch_alternatives and l[0] in alternatives:
characters.extend(alternatives[l[0]])
strokes = int(l[3])
readings = l[4].split('・')
meanings = l[5].split(', ')
comment = l[6].rstrip('\n')
r = Radical(id_, characters, strokes, readings, meanings, comment)
radicals.append(r)
return radicals
radicals = get_radicals()
kanjis = get_kanjis()
mean2radical = dict()
for radical in radicals:
for meaning in radical.meanings:
mean2radical[meaning] = radical.characters
radchar2rad = dict()
for radical in radicals:
for character in radical.characters:
radchar2rad[character] = radical
for alt in alternatives.get(character, []):
radchar2rad[alt] = radical
# if character in alternatives:
# radchar2rad[alternatives[character]] = radical
# print(radchar2rad['亻'])
def _get_uncovered_radicals():
"""
Return set of radicals that are used in kanji decomposition kradfile-u but
do not have entry in radicals.txt
"""
from itertools import chain
radicals_from_kanjis = set(chain(*(kanji.radicals for kanji in kanjis)))
return radicals_from_kanjis.difference(radchar2rad.keys())
def find_kanji_from_radical_meanings(meanings):
results = []
# convert english words to radical characters
searched_rads = []
for meaning in meanings:
if meaning in mean2radical:
searched_rads.append(mean2radical[meaning])
else:
print(f'Error: Radical with meaning {meaning} not found')
sys.exit(1)
# if len(searched_rads) == 0:
# print('Error: No radicals found')
# sys.exit(1)
#searched_rads = [mean2radical[meaning] for meaning in meanings]
for kanji in kanjis:
if all(any(variant in kanji.radicals for variant in sr) for sr in searched_rads):
results.append(kanji)
return results
def list_available_radicals():
for r in radicals:
meaningstring = ', '.join(r.meanings)
print(f'{r.characters[0]} {meaningstring}')
def list_kanji_radicals(kanji_char):
for kanji in kanjis:
if kanji_char == kanji.character:
for rchar in kanji.radicals:
# print('亻', rchar, '亻' == rchar, ord('亻'), ord(rchar))
radical = radchar2rad[rchar]
meaningstring = ', '.join(radical.meanings)
print(f'{rchar} {meaningstring}')
break
else:
print('Kanji not found')
def search_kanji(meanings):
possible = find_kanji_from_radical_meanings(meanings)
for p in possible:
meanarr = []
for rchar in p.radicals:
radical = radchar2rad.get(rchar)
if radical:
meanarr.extend(radical.meanings)
meaningstring = ', '.join(meanarr)
print(f'{p.character} {meaningstring}')
# print(p.character)
def list_possible_meanings(fish=True):
for radical in radicals:
for meaning in radical.meanings:
charstring = '/'.join(radical.characters)
if fish:
print(f'{meaning}\t{charstring}')
else:
print(f'{meaning}')
def main():
if '--fish-completion' in sys.argv:
list_possible_meanings(fish=True)
return
if len(sys.argv) <= 1:
list_available_radicals()
return
terms = sys.argv[1:]
if len(terms) == 1 and len(terms[0]) == 1 and terms[0] not in string.ascii_letters:
list_kanji_radicals(terms[0])
return
search_kanji(terms)
if __name__ == '__main__':
main()