-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: support npm search command like npmio (#513)
- [x] 找个合适的 eggjs es 插件,或者手撸个,看社区的几个版本都比较低 - [x] HTTP Server 新增 API `/-/v1/search?text=react&size=20&from=0&quality=0.65&popularity=0.98&maintenance=0.5`,第一版不一定能 qpm 都支持,先支持现有的下载量数据,即 popularity 的参考数据 - [x] 监听相关的 metadata 变更的 event,同步写入、删除 增量的 ES 数据,ES 有较强抗压能力,这块直接做成同步就好 - [x] 考虑可能同步也会丢部分数据(stream 不稳定时),HTTP Server 再追加一个手动同步 ES 的接口,传包名同步触发写 ES 即可 - [x] 提供全量一次性的初始化同步 ES 脚本 - [x] setting/mapping 参考 https://github.com/npms-io/npms-analyzer/blob/master/config/elasticsearch/npms.json5 --------- Co-authored-by: fengmk2 <fengmk2@gmail.com> Co-authored-by: elrrrrrrr <elrrrrrrr@gmail.com>
- Loading branch information
1 parent
6e45ac5
commit 7f85848
Showing
19 changed files
with
1,797 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// TODO sync event | ||
/* eslint-disable @typescript-eslint/no-unused-vars */ | ||
import { EggAppConfig } from 'egg'; | ||
import { Event, Inject } from '@eggjs/tegg'; | ||
import { | ||
PACKAGE_UNPUBLISHED, | ||
PACKAGE_VERSION_ADDED, | ||
PACKAGE_VERSION_REMOVED, | ||
PACKAGE_TAG_ADDED, | ||
PACKAGE_TAG_CHANGED, | ||
PACKAGE_TAG_REMOVED, | ||
PACKAGE_MAINTAINER_CHANGED, | ||
PACKAGE_MAINTAINER_REMOVED, | ||
PACKAGE_META_CHANGED, | ||
} from './index'; | ||
|
||
import { PackageSearchService } from '../service/PackageSearchService'; | ||
|
||
class SyncESPackage { | ||
@Inject() | ||
protected readonly packageSearchService: PackageSearchService; | ||
|
||
@Inject() | ||
protected readonly config: EggAppConfig; | ||
|
||
protected async syncPackage(fullname: string) { | ||
if (!this.config.cnpmcore.enableElasticsearch) return; | ||
await this.packageSearchService.syncPackage(fullname, true); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_UNPUBLISHED) | ||
export class PackageUnpublished extends SyncESPackage { | ||
async handle(fullname: string) { | ||
if (!this.config.cnpmcore.enableElasticsearch) return; | ||
await this.packageSearchService.removePackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_VERSION_ADDED) | ||
export class PackageVersionAdded extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_VERSION_REMOVED) | ||
export class PackageVersionRemoved extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_TAG_ADDED) | ||
export class PackageTagAdded extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_TAG_CHANGED) | ||
export class PackageTagChanged extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_TAG_REMOVED) | ||
export class PackageTagRemoved extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_MAINTAINER_CHANGED) | ||
export class PackageMaintainerChanged extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_MAINTAINER_REMOVED) | ||
export class PackageMaintainerRemoved extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} | ||
|
||
@Event(PACKAGE_META_CHANGED) | ||
export class PackageMetaChanged extends SyncESPackage { | ||
async handle(fullname: string) { | ||
await this.syncPackage(fullname); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
import { AccessLevel, Inject, SingletonProto } from '@eggjs/tegg'; | ||
import type { estypes } from '@elastic/elasticsearch'; | ||
import dayjs from 'dayjs'; | ||
|
||
import { AbstractService } from '../../common/AbstractService'; | ||
import { formatAuthor, getScopeAndName } from '../../common/PackageUtil'; | ||
import { PackageManagerService } from './PackageManagerService'; | ||
import { SearchManifestType, SearchMappingType, SearchRepository } from '../../repository/SearchRepository'; | ||
import { PackageVersionDownloadRepository } from '../../repository/PackageVersionDownloadRepository'; | ||
import { PackageRepository } from '../../repository/PackageRepository'; | ||
|
||
|
||
@SingletonProto({ | ||
accessLevel: AccessLevel.PUBLIC, | ||
}) | ||
export class PackageSearchService extends AbstractService { | ||
@Inject() | ||
private readonly packageManagerService: PackageManagerService; | ||
@Inject() | ||
private readonly searchRepository: SearchRepository; | ||
@Inject() | ||
private packageVersionDownloadRepository: PackageVersionDownloadRepository; | ||
@Inject() | ||
protected packageRepository: PackageRepository; | ||
|
||
async syncPackage(fullname: string, isSync = true) { | ||
const [ scope, name ] = getScopeAndName(fullname); | ||
const fullManifests = await this.packageManagerService.listPackageFullManifests(scope, name, isSync); | ||
|
||
if (!fullManifests.data) { | ||
this.logger.warn('[PackageSearchService.syncPackage] save package:%s not found', fullname); | ||
return; | ||
} | ||
|
||
const pkg = await this.packageRepository.findPackage(scope, name); | ||
if (!pkg) { | ||
this.logger.warn('[PackageSearchService.syncPackage] findPackage:%s not found', fullname); | ||
return; | ||
} | ||
|
||
// get last year download data | ||
const startDate = dayjs().subtract(1, 'year'); | ||
const endDate = dayjs(); | ||
|
||
const entities = await this.packageVersionDownloadRepository.query(pkg.packageId, startDate.toDate(), endDate.toDate()); | ||
let downloadsAll = 0; | ||
for (const entity of entities) { | ||
for (let i = 1; i <= 31; i++) { | ||
const day = String(i).padStart(2, '0'); | ||
const field = `d${day}`; | ||
const counter = entity[field]; | ||
if (!counter) continue; | ||
downloadsAll += counter; | ||
} | ||
} | ||
|
||
const { data: manifest } = fullManifests; | ||
|
||
const latestVersion = manifest['dist-tags'].latest; | ||
const latestManifest = manifest.versions[latestVersion]; | ||
|
||
const packageDoc: SearchMappingType = { | ||
name: manifest.name, | ||
version: latestVersion, | ||
_rev: manifest._rev, | ||
scope: scope ? scope.replace('@', '') : 'unscoped', | ||
keywords: manifest.keywords || [], | ||
versions: Object.keys(manifest.versions), | ||
description: manifest.description, | ||
license: manifest.license, | ||
maintainers: manifest.maintainers, | ||
author: formatAuthor(manifest.author), | ||
'dist-tags': manifest['dist-tags'], | ||
date: manifest.time[latestVersion], | ||
created: manifest.time.created, | ||
modified: manifest.time.modified, | ||
// 归属 registry,keywords 枚举值 | ||
_source_registry_name: manifest._source_registry_name, | ||
// 最新版本发布人 _npmUser: | ||
_npmUser: latestManifest?._npmUser, | ||
// 最新版本发布信息 | ||
publish_time: latestManifest?.publish_time, | ||
}; | ||
|
||
const document: SearchManifestType = { | ||
package: packageDoc, | ||
downloads: { | ||
all: downloadsAll, | ||
}, | ||
}; | ||
|
||
return await this.searchRepository.upsertPackage(document); | ||
} | ||
|
||
async searchPackage(text: string, from: number, size: number): Promise<{ objects: (SearchManifestType | undefined)[], total: number }> { | ||
const matchQueries = this._buildMatchQueries(text); | ||
const scriptScore = this._buildScriptScore({ | ||
text, | ||
scoreEffect: 0.25, | ||
}); | ||
|
||
const res = await this.searchRepository.searchPackage({ | ||
body: { | ||
size, | ||
from, | ||
query: { | ||
function_score: { | ||
boost_mode: 'replace', | ||
query: { | ||
bool: { | ||
should: matchQueries, | ||
minimum_should_match: matchQueries.length ? 1 : 0, | ||
}, | ||
}, | ||
script_score: scriptScore, | ||
}, | ||
}, | ||
}, | ||
}); | ||
const { hits, total } = res; | ||
return { | ||
objects: hits?.map(item => { | ||
return item._source; | ||
}), | ||
total: (total as estypes.SearchTotalHits).value, | ||
}; | ||
} | ||
|
||
async removePackage(fullname: string) { | ||
return await this.searchRepository.removePackage(fullname); | ||
} | ||
|
||
// https://github.com/npms-io/queries/blob/master/lib/search.js#L8C1-L78C2 | ||
private _buildMatchQueries(text: string) { | ||
return [ | ||
// Standard match using cross_fields | ||
{ | ||
multi_match: { | ||
query: text, | ||
operator: 'and', | ||
fields: [ | ||
'package.name.standard^4', | ||
'package.description.standard', | ||
'package.keywords.standard^2', | ||
], | ||
type: 'cross_fields', | ||
boost: 6, | ||
tie_breaker: 0.5, | ||
}, | ||
}, | ||
|
||
// Partial match using edge-ngram | ||
{ | ||
multi_match: { | ||
query: text, | ||
operator: 'and', | ||
fields: [ | ||
'package.name.edge_ngram^4', | ||
'package.description.edge_ngram', | ||
'package.keywords.edge_ngram^2', | ||
], | ||
type: 'phrase', | ||
slop: 3, | ||
boost: 3, | ||
tie_breaker: 0.5, | ||
}, | ||
}, | ||
|
||
// Normal term match with an english stemmer | ||
{ | ||
multi_match: { | ||
query: text, | ||
operator: 'and', | ||
fields: [ | ||
'package.name.english_docs^4', | ||
'package.description.english_docs', | ||
'package.keywords.english_docs^2', | ||
], | ||
type: 'cross_fields', | ||
boost: 3, | ||
tie_breaker: 0.5, | ||
}, | ||
}, | ||
|
||
// Normal term match with a more aggressive english stemmer (not so important) | ||
{ | ||
multi_match: { | ||
query: text, | ||
operator: 'and', | ||
fields: [ | ||
'package.name.english_aggressive_docs^4', | ||
'package.description.english_aggressive_docs', | ||
'package.keywords.english_aggressive_docs^2', | ||
], | ||
type: 'cross_fields', | ||
tie_breaker: 0.5, | ||
}, | ||
}, | ||
]; | ||
} | ||
|
||
private _buildScriptScore(params: { text: string | undefined, scoreEffect: number }) { | ||
// keep search simple, only download(popularity) | ||
const downloads = 'doc["downloads.all"].value'; | ||
const source = `doc["package.name.raw"].value.equals("${params.text}") ? 100000 + ${downloads} : _score * Math.pow(${downloads}, ${params.scoreEffect})`; | ||
return { | ||
script: { | ||
source, | ||
params: { | ||
text: params.text || '', | ||
scoreEffect: params.scoreEffect, | ||
}, | ||
}, | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.