-
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Dict.ts
295 lines (238 loc) · 8.45 KB
/
Dict.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import uniq_by from 'lodash/uniqBy'
import { fread } from 'MyFile'
import db from 'MyDB'
import { Db as MongoDB, Collection } from 'mongodb'
export default class Dict {
static db: MongoDB
static en: Collection
static en_proncs: Collection
static ja: Collection
static grammars: { [id: string]: Grammar }
static assets_cache: { [key: string]: string } = { }
static LEVEL_PATTERN = /^N[1-5]$/i
static LIMIT = 100
static async init () {
this.db = db.client.db('dict')
this.en = this.db.collection('en')
this.en_proncs = this.db.collection('en_proncs')
this.ja = this.db.collection('jp')
this.grammars = ( await import(`${global.ROOT}/Dict/grammars.json`) ).default
}
static async search_db (collection: Collection, pattern: string, isStartsWith = true): Promise<Item[]> {
return await collection.find({
index: {
$regex: (isStartsWith ? '^' : '') + pattern,
$options: 'i'
}
}, {
limit: this.LIMIT,
batchSize: this.LIMIT
}).toArray()
}
static search_grammars (index: string) {
const grammar_entries = Object.entries(this.grammars)
let grammars: Grammar[] = [ ]
// --- 按 level 学习
if (this.LEVEL_PATTERN.test(index))
grammars = grammar_entries
.filter( ([id, grammar]) => grammar.level === index)
.map( ([id, grammar]) => grammar)
// --- 按分类学习
if (!grammars.length && this.GRAMMAR_CATEGOREIS.has(index))
grammars = grammar_entries
.filter( ([id, grammar]) =>
grammar.contents.map( g => g.situation).some( situation => situation === index)
)
.map( ([id, grammar]) => grammar)
// --- 查询语法
if (!grammars.length) {
const pattern = new RegExp(index, 'i')
grammars = grammar_entries
.filter( ([id, grammar]) => pattern.test(grammar.index))
.map( ([id, grammar]) => grammar)
.slice(0, this.LIMIT)
}
grammars.forEach( grammar => { grammar.type = 'JP_GRAMMAR' })
return grammars
}
static async search (index = '', { is_load_assets = true } = { }) {
index = index.trim()
// ------------ 查询英语单词,搜索 《OALD 牛津高阶英汉双解词典》(第8版)
// eslint-disable-next-line no-control-regex
if (/^[\x00-\x7F]*$/.test(index) && !this.LEVEL_PATTERN.test(index)) {
let items: Item[] = await this.search_db(this.en, index)
if (!is_load_assets) return items
// --- 加载资源
await Promise.all( items.map( async item => {
item.type = 'EN_OALD'
// --- 根据链接获取图片资源
item.assets = { }
for (const line of item.content.split_lines()) {
const matches = /.*src="\/(symbols|pic|thumb)\/(.+?)\.(.+?)"/i.exec(line)
if (!matches) continue
const asset_key = matches[1] + '/' + matches[2] + '.' + matches[3]
// 已存在
if (asset_key in item.assets) continue
// 缓存命中
if (asset_key in this.assets_cache) {
item.assets[asset_key] = this.assets_cache[asset_key]
continue
}
try {
const img_buf = await fread(global.ROOT + 'Dict/OALD/' + asset_key, { encoding: 'BINARY', print: false })
this.assets_cache[asset_key] = item.assets[asset_key] = img_buf.toString('base64')
} catch (error) {
if (error.code === 'ENOENT') {
console.error('ENOENT:', index, asset_key)
return
}
throw error
}
}
// --- 加载发音(美音),加入 assets
await Promise.all(
item.proncs.map( pronc =>
this.en_proncs.findOne({ index: pronc }).then( proncDoc => {
// new Buffer pronc_doc.spx.value() 是错误的方法
item.assets[`proncs/${pronc}.spx`] = proncDoc.spx.buffer.toString('base64')
})
))
}))
return items
// ------------ 查询日语语法或单词
} else {
// --- 语法
const grammars = this.search_grammars(index)
// --- 講談社日中辞典 JTS
const items = await Promise.all(
uniq_by(
(await Promise.all([this.search_db(this.ja, index), this.search_db(this.ja, index, false)])).flat(),
item => item._id.toString()
)
.slice(0, this.LIMIT)
.map( async item => {
const first_line = item.content.split_lines()[0]
const LINK_PATTERN = '@@@LINK='
if (first_line.startsWith(LINK_PATTERN)) {
const real_index = first_line.rm(LINK_PATTERN)
item = await this.ja.findOne({ index: real_index })
}
item.type = 'JP_JTS'
return item
})
)
return [ ...grammars, ...uniq_by(items, item => item._id.toString()) ]
}
}
static GRAMMAR_CATEGOREIS = new Set([
'逆接条件',
'並列',
'時点・場面',
'例示',
'主張・断定',
'決定',
'経過・結末',
'授受',
'様子・状態',
'.',
'軽重の強調',
'基準',
'時間的前後',
'関連・対応',
'可能・難易',
'対象',
'原因・理由',
'相関関係',
'判断の立場',
'非限定',
'変化・不定',
'程度',
'対比',
'逆接・譲歩',
'無関係・除外',
'進行',
'付加',
'手段・媒介',
'其它',
'敬意',
'習慣・反復・職業・身分',
'経験',
'依頼',
'意図的行為・動作の開始と終了',
'前置き・和らげ',
'限定',
'限界',
'心情の強調',
'方向性',
'順次',
'継続',
'付帯状態・非付帯状態',
'傾向',
'比況',
'願望',
'起点・終点',
'範囲',
'説明',
'言い換え',
'納得',
'比較',
'感嘆',
'同意・確認',
'目的',
'反復',
'命令',
'条件',
'禁止',
'反実仮想',
'勧誘',
'判断',
'助言',
'話題',
'部分否定',
'不必要',
'許可',
'移動の状態',
'引用',
'受け身・使役受け身',
'意志',
'使役',
'申し出',
'強制',
'否定',
'強調',
'伝聞',
'名前の紹介',
'評価の視点',
'婉曲',
'義務',
])
}
interface Item {
type?: 'EN_OALD' | 'JP_JTS' | 'JP_GRAMMAR'
index: string
_id?: any
content: string
assets?: { [key: string]: string }
proncs: string[]
}
interface Grammar extends Item {
id: number
index: string
type?: 'JP_GRAMMAR'
/** 语法级别 */
level?: 'N1' | 'N2' | 'N3' | 'N4' | 'N5' | '+收藏'
range?: string
contents: ({
situation: string
explainations: {
cn: string[]
}
notes: string
} | {
continuation: string
meaning: string
situation: string
sentences: string[]
notes: string
})[]
}