-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
277 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Ignore all files by default | ||
* | ||
|
||
# White list only the required files | ||
!config.json | ||
!v1 | ||
!swagger.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# syntax=docker/dockerfile:1.2 | ||
FROM cnrsinist/ezs-python-server:py3.9-no16-1.0.11 | ||
|
||
USER root | ||
# Install all python dependencies | ||
RUN pip install \ | ||
spacy==3.6.1 \ | ||
xx-ent-wiki-sm@https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.6.0/xx_ent_wiki_sm-3.6.0.tar.gz \ | ||
numpy==1.26.4 | ||
|
||
|
||
# Install all node dependencies | ||
# RUN npm install \ | ||
# @ezs/strings@1.0.3 | ||
|
||
WORKDIR /app/public | ||
# Declare files to copy in .dockerignore | ||
COPY --chown=daemon:daemon . /app/public/ | ||
RUN mv ./config.json /app && chmod a+w /app/config.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# ws-person-ner@0.0.0 | ||
|
||
Extraction d'entités nommées de noms de personnes | ||
|
||
Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"environnement": { | ||
"EZS_TITLE": "Extraction d'entités nommées de noms de personnes", | ||
"EZS_DESCRIPTION": "Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte", | ||
"EZS_METRICS": true, | ||
"EZS_CONCURRENCY": 2, | ||
"EZS_CONTINUE_DELAY": 60, | ||
"EZS_NSHARDS": 32, | ||
"EZS_CACHE": true, | ||
"EZS_VERBOSE": false, | ||
"NODE_OPTIONS": "--max_old_space_size=1024", | ||
"NODE_ENV": "production" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# These examples can be used directly in VSCode, using HTTPYac extension (anweber.vscode-httpyac) | ||
# They are important, because used to generate the tests.hurl file. | ||
|
||
# Décommenter/commenter les lignes voulues pour tester localement | ||
@host=http://localhost:31976 | ||
# @host=https://person-ner.services.istex.fr | ||
|
||
### | ||
# @name v1Tagger | ||
# Description de la route | ||
POST {{host}}/v1/tagger?indent=true | ||
content-type: application/json | ||
[ | ||
{"value": "Python is widely used in data science. Bob R. uses it ; he works for the CNRS"}, | ||
{"value": "Jean Dupont assiste àu festival de Cannes sur la côte d'Azur."} | ||
] | ||
|
||
|
||
HTTP 200 | ||
[{ | ||
"value": { | ||
"PER": [ | ||
"Bob R." | ||
], | ||
"LOC": [], | ||
"ORG": [ | ||
"CNRS" | ||
], | ||
"MISC": [ | ||
"Python" | ||
] | ||
} | ||
}, | ||
{ | ||
"value": { | ||
"PER": [ | ||
"Jean Dupont" | ||
], | ||
"LOC": [ | ||
"côte d'Azur" | ||
], | ||
"ORG": [], | ||
"MISC": [ | ||
"festival de Cannes" | ||
] | ||
} | ||
}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"private": true, | ||
"name": "ws-person-ner", | ||
"version": "0.0.0", | ||
"description": "Extraction d'entités nommées de noms de personnes", | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/Inist-CNRS/web-services.git" | ||
}, | ||
"keywords": [ | ||
"ezmaster" | ||
], | ||
"author": "Léo Gaillard <leo.gaillard@inist.fr>", | ||
"license": "MIT", | ||
"bugs": { | ||
"url": "https://github.com/Inist-CNRS/web-services/issues" | ||
}, | ||
"homepage": "https://github.com/Inist-CNRS/web-services/#readme", | ||
"scripts": { | ||
"version:insert:readme": "sed -i \"s#\\(${npm_package_name}.\\)\\([\\.a-z0-9]\\+\\)#\\1${npm_package_version}#g\" README.md && git add README.md", | ||
"version:insert:swagger": "sed -i \"s/\\\"version\\\": \\\"[0-9]\\+.[0-9]\\+.[0-9]\\+\\\"/\\\"version\\\": \\\"${npm_package_version}\\\"/g\" swagger.json && git add swagger.json", | ||
"version:insert": "npm run version:insert:readme && npm run version:insert:swagger", | ||
"version:commit": "git commit -a -m \"release ${npm_package_name}@${npm_package_version}\"", | ||
"version:tag": "git tag \"${npm_package_name}@${npm_package_version}\" -m \"${npm_package_name}@${npm_package_version}\"", | ||
"version:push": "git push && git push --tags", | ||
"version": "npm run version:insert && npm run version:commit && npm run version:tag", | ||
"postversion": "npm run version:push", | ||
"build:dev": "docker build -t cnrsinist/${npm_package_name}:latest .", | ||
"start:dev": "npm run build:dev && docker run --name dev --rm --detach -p 31976:31976 cnrsinist/${npm_package_name}:latest", | ||
"stop:dev": "docker stop dev", | ||
"build": "docker build -t cnrsinist/${npm_package_name}:${npm_package_version} .", | ||
"start": "docker run --rm -p 31976:31976 cnrsinist/${npm_package_name}:${npm_package_version}", | ||
"publish": "docker push cnrsinist/${npm_package_name}:${npm_package_version}" | ||
}, | ||
"avoid-testing": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
{ | ||
"openapi": "3.0.0", | ||
"info": { | ||
"title": "person-ner - Extraction d'entité nommées de noms de personnes", | ||
"description": "Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte", | ||
"version": "0.0.0", | ||
"termsOfService": "https://services.istex.fr/", | ||
"contact": { | ||
"name": "Inist-CNRS", | ||
"url": "https://www.inist.fr/nous-contacter/" | ||
} | ||
}, | ||
"servers": [ | ||
{ | ||
"x-comment": "Will be automatically completed by the ezs server." | ||
}, | ||
{ | ||
"url": "http://vptdmservices.intra.inist.fr:49225/", | ||
"description": "Latest version for production", | ||
"x-profil": "Standard" | ||
} | ||
], | ||
"tags": [ | ||
{ | ||
"name": "person-ner", | ||
"description": "Extraction d'entité nommées de noms de personnes", | ||
"externalDocs": { | ||
"description": "Plus de documentation", | ||
"url": "https://github.com/inist-cnrs/web-services/tree/main/services/person-ner" | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
POST {{host}}/v1/tagger?indent=true | ||
content-type: application/json | ||
[ | ||
{"value": "Python is widely used in data science. Bob R. uses it ; he works for the CNRS"}, | ||
{"value": "Jean Dupont assiste àu festival de Cannes sur la côte d'Azur."} | ||
] | ||
|
||
|
||
HTTP 200 | ||
[{ | ||
"value": { | ||
"PER": [ | ||
"Bob R." | ||
], | ||
"LOC": [], | ||
"ORG": [ | ||
"CNRS" | ||
], | ||
"MISC": [ | ||
"Python" | ||
] | ||
} | ||
}, | ||
{ | ||
"value": { | ||
"PER": [ | ||
"Jean Dupont" | ||
], | ||
"LOC": [ | ||
"côte d'Azur" | ||
], | ||
"ORG": [], | ||
"MISC": [ | ||
"festival de Cannes" | ||
] | ||
} | ||
}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# OpenAPI Documentation - JSON format (dot notation) | ||
mimeType = application/json | ||
|
||
post.description = Extraction d'entités nommées de noms de personnes | ||
post.responses.default.description = Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte | ||
post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream | ||
post.summary = Extraction d'entités nommées de noms de personnes | ||
post.requestBody.required = true | ||
post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream | ||
post.parameters.0.in = query | ||
post.parameters.0.name = indent | ||
post.parameters.0.schema.type = boolean | ||
post.parameters.0.description = Indent or not the JSON Result | ||
# Examples | ||
post.requestBody.content.application/json.example.0.value = Python is widely used in data science. Bob R. uses it ; he works for the CNRS | ||
post.requestBody.content.application/json.example.1.value = Jean Dupont assiste àu festival de Cannes sur la côte d'Azur. | ||
post.responses.default.content.application/json.example.0.value.PER.0 = Bob R. | ||
post.responses.default.content.application/json.example.0.value.LOC = [] | ||
post.responses.default.content.application/json.example.0.value.ORG.0 = CNRS | ||
post.responses.default.content.application/json.example.0.value.MISC.0 = Python | ||
post.responses.default.content.application/json.example.1.value.PER.0 = Jean Dupont | ||
post.responses.default.content.application/json.example.1.value.LOC.0 = côte d'Azur | ||
post.responses.default.content.application/json.example.1.value.ORG = [] | ||
post.responses.default.content.application/json.example.1.value.MISC.0 = festival de Cannes | ||
;' | ||
[use] | ||
plugin = @ezs/spawn | ||
plugin = @ezs/basics | ||
|
||
[JSONParse] | ||
separator = * | ||
|
||
[expand] | ||
path = value | ||
size = 10 | ||
|
||
[expand/exec] | ||
# command should be executable ! | ||
command = ./v1/tagger.py | ||
|
||
[dump] | ||
indent = env('indent', false) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
import spacy | ||
import json | ||
import sys | ||
|
||
nlp = spacy.load("xx_ent_wiki_sm") | ||
|
||
for line in sys.stdin: | ||
line = json.loads(line) | ||
res = {"PER":[],"LOC":[],"ORG":[],"MISC":[]} | ||
try: | ||
value = line["value"] | ||
except KeyError: | ||
value = "" | ||
|
||
try: | ||
doc = nlp(value) | ||
doc = doc.ents | ||
except Exception: | ||
doc = [] | ||
|
||
for ent in doc: | ||
res[ent.label_].append(ent.text) | ||
|
||
line["value"] = res | ||
|
||
sys.stdout.write(json.dumps(line)) | ||
sys.stdout.write("\n") |