Skip to content

Commit

Permalink
feat(person-ner): create service
Browse files Browse the repository at this point in the history
  • Loading branch information
leogail committed Aug 6, 2024
1 parent b51c108 commit f12ff3c
Show file tree
Hide file tree
Showing 12 changed files with 277 additions and 6 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ All contributing instructions are in [CONTRIBUTING](CONTRIBUTING.md).
- [ark-tools](./services/ark-tools) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-ark-tools.svg)](https://hub.docker.com/r/cnrsinist/ws-ark-tools/)
- [astro-ner](./services/astro-ner) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-astro-ner.svg)](https://hub.docker.com/r/cnrsinist/ws-astro-ner/)
- [authors-tools](./services/authors-tools) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-authors-tools.svg)](https://hub.docker.com/r/cnrsinist/ws-authors-tools/)
- [base-line-python](./services/base-line-python) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-base-line-python.svg)](https://hub.docker.com/r/cnrsinist/ws-base-line-python/)
- [base-line](./services/base-line) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-base-line.svg)](https://hub.docker.com/r/cnrsinist/ws-base-line/)
- [base-line-python](./services/base-line-python) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-base-line-python.svg)](https://hub.docker.com/r/cnrsinist/ws-base-line-python/)
- [biblio-ref](./services/biblio-ref) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-biblio-ref.svg)](https://hub.docker.com/r/cnrsinist/ws-biblio-ref/)
- [biblio-tools](./services/biblio-tools) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-biblio-tools.svg)](https://hub.docker.com/r/cnrsinist/ws-biblio-tools/)
- [chem-ner](./services/chem-ner) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-chem-ner.svg)](https://hub.docker.com/r/cnrsinist/ws-chem-ner/)
Expand All @@ -42,6 +42,7 @@ All contributing instructions are in [CONTRIBUTING](CONTRIBUTING.md).
- [ner-tagger](./services/ner-tagger) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-ner-tagger.svg)](https://hub.docker.com/r/cnrsinist/ws-ner-tagger/)
- [nlp-tools2](./services/nlp-tools2) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-nlp-tools2.svg)](https://hub.docker.com/r/cnrsinist/ws-nlp-tools2/)
- [pdf-text](./services/pdf-text) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-pdf-text.svg)](https://hub.docker.com/r/cnrsinist/ws-pdf-text/)
- [person-ner](./services/person-ner) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-person-ner.svg)](https://hub.docker.com/r/cnrsinist/ws-person-ner/)
- [sciencemetrix-classification](./services/sciencemetrix-classification) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-sciencemetrix-classification.svg)](https://hub.docker.com/r/cnrsinist/ws-sciencemetrix-classification/)
- [terms-extraction](./services/terms-extraction) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-terms-extraction.svg)](https://hub.docker.com/r/cnrsinist/ws-terms-extraction/)
- [text-clustering](./services/text-clustering) [![Docker Pulls](https://img.shields.io/docker/pulls/cnrsinist/ws-text-clustering.svg)](https://hub.docker.com/r/cnrsinist/ws-text-clustering/)
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
"services/ark-tools",
"services/astro-ner",
"services/authors-tools",
"services/base-line-python",
"services/base-line",
"services/base-line-python",
"services/biblio-ref",
"services/biblio-tools",
"services/chem-ner",
Expand All @@ -60,10 +60,10 @@
"services/ner-tagger",
"services/nlp-tools2",
"services/pdf-text",
"services/terms-extraction",
"services/text-clustering",
"services/person-ner",
"services/sciencemetrix-classification",
"services/terms-extraction"
"services/terms-extraction",
"services/text-clustering"
],
"dependencies": {
"@ezs/analytics": "2.3.2",
Expand All @@ -76,4 +76,4 @@
"devDependencies": {
"@types/node": "20.11.30"
}
}
}
7 changes: 7 additions & 0 deletions services/person-ner/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Ignore all files by default
*

# White list only the required files
!config.json
!v1
!swagger.json
19 changes: 19 additions & 0 deletions services/person-ner/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# syntax=docker/dockerfile:1.2
FROM cnrsinist/ezs-python-server:py3.9-no16-1.0.11

USER root
# Install all python dependencies
RUN pip install \
spacy==3.6.1 \
xx-ent-wiki-sm@https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.6.0/xx_ent_wiki_sm-3.6.0.tar.gz \
numpy==1.26.4


# Install all node dependencies
# RUN npm install \
# @ezs/strings@1.0.3

WORKDIR /app/public
# Declare files to copy in .dockerignore
COPY --chown=daemon:daemon . /app/public/
RUN mv ./config.json /app && chmod a+w /app/config.json
5 changes: 5 additions & 0 deletions services/person-ner/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# ws-person-ner@0.0.0

Extraction d'entités nommées de noms de personnes

Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte
14 changes: 14 additions & 0 deletions services/person-ner/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"environnement": {
"EZS_TITLE": "Extraction d'entités nommées de noms de personnes",
"EZS_DESCRIPTION": "Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte",
"EZS_METRICS": true,
"EZS_CONCURRENCY": 2,
"EZS_CONTINUE_DELAY": 60,
"EZS_NSHARDS": 32,
"EZS_CACHE": true,
"EZS_VERBOSE": false,
"NODE_OPTIONS": "--max_old_space_size=1024",
"NODE_ENV": "production"
}
}
47 changes: 47 additions & 0 deletions services/person-ner/examples.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# These examples can be used directly in VSCode, using HTTPYac extension (anweber.vscode-httpyac)
# They are important, because used to generate the tests.hurl file.

# Décommenter/commenter les lignes voulues pour tester localement
@host=http://localhost:31976
# @host=https://person-ner.services.istex.fr

###
# @name v1Tagger
# Description de la route
POST {{host}}/v1/tagger?indent=true
content-type: application/json
[
{"value": "Python is widely used in data science. Bob R. uses it ; he works for the CNRS"},
{"value": "Jean Dupont assiste àu festival de Cannes sur la côte d'Azur."}
]


HTTP 200
[{
"value": {
"PER": [
"Bob R."
],
"LOC": [],
"ORG": [
"CNRS"
],
"MISC": [
"Python"
]
}
},
{
"value": {
"PER": [
"Jean Dupont"
],
"LOC": [
"côte d'Azur"
],
"ORG": [],
"MISC": [
"festival de Cannes"
]
}
}]
36 changes: 36 additions & 0 deletions services/person-ner/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"private": true,
"name": "ws-person-ner",
"version": "0.0.0",
"description": "Extraction d'entités nommées de noms de personnes",
"repository": {
"type": "git",
"url": "git+https://github.com/Inist-CNRS/web-services.git"
},
"keywords": [
"ezmaster"
],
"author": "Léo Gaillard <leo.gaillard@inist.fr>",
"license": "MIT",
"bugs": {
"url": "https://github.com/Inist-CNRS/web-services/issues"
},
"homepage": "https://github.com/Inist-CNRS/web-services/#readme",
"scripts": {
"version:insert:readme": "sed -i \"s#\\(${npm_package_name}.\\)\\([\\.a-z0-9]\\+\\)#\\1${npm_package_version}#g\" README.md && git add README.md",
"version:insert:swagger": "sed -i \"s/\\\"version\\\": \\\"[0-9]\\+.[0-9]\\+.[0-9]\\+\\\"/\\\"version\\\": \\\"${npm_package_version}\\\"/g\" swagger.json && git add swagger.json",
"version:insert": "npm run version:insert:readme && npm run version:insert:swagger",
"version:commit": "git commit -a -m \"release ${npm_package_name}@${npm_package_version}\"",
"version:tag": "git tag \"${npm_package_name}@${npm_package_version}\" -m \"${npm_package_name}@${npm_package_version}\"",
"version:push": "git push && git push --tags",
"version": "npm run version:insert && npm run version:commit && npm run version:tag",
"postversion": "npm run version:push",
"build:dev": "docker build -t cnrsinist/${npm_package_name}:latest .",
"start:dev": "npm run build:dev && docker run --name dev --rm --detach -p 31976:31976 cnrsinist/${npm_package_name}:latest",
"stop:dev": "docker stop dev",
"build": "docker build -t cnrsinist/${npm_package_name}:${npm_package_version} .",
"start": "docker run --rm -p 31976:31976 cnrsinist/${npm_package_name}:${npm_package_version}",
"publish": "docker push cnrsinist/${npm_package_name}:${npm_package_version}"
},
"avoid-testing": false
}
33 changes: 33 additions & 0 deletions services/person-ner/swagger.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"openapi": "3.0.0",
"info": {
"title": "person-ner - Extraction d'entité nommées de noms de personnes",
"description": "Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte",
"version": "0.0.0",
"termsOfService": "https://services.istex.fr/",
"contact": {
"name": "Inist-CNRS",
"url": "https://www.inist.fr/nous-contacter/"
}
},
"servers": [
{
"x-comment": "Will be automatically completed by the ezs server."
},
{
"url": "http://vptdmservices.intra.inist.fr:49225/",
"description": "Latest version for production",
"x-profil": "Standard"
}
],
"tags": [
{
"name": "person-ner",
"description": "Extraction d'entité nommées de noms de personnes",
"externalDocs": {
"description": "Plus de documentation",
"url": "https://github.com/inist-cnrs/web-services/tree/main/services/person-ner"
}
}
]
}
37 changes: 37 additions & 0 deletions services/person-ner/tests.hurl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
POST {{host}}/v1/tagger?indent=true
content-type: application/json
[
{"value": "Python is widely used in data science. Bob R. uses it ; he works for the CNRS"},
{"value": "Jean Dupont assiste àu festival de Cannes sur la côte d'Azur."}
]


HTTP 200
[{
"value": {
"PER": [
"Bob R."
],
"LOC": [],
"ORG": [
"CNRS"
],
"MISC": [
"Python"
]
}
},
{
"value": {
"PER": [
"Jean Dupont"
],
"LOC": [
"côte d'Azur"
],
"ORG": [],
"MISC": [
"festival de Cannes"
]
}
}]
43 changes: 43 additions & 0 deletions services/person-ner/v1/tagger.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# OpenAPI Documentation - JSON format (dot notation)
mimeType = application/json

post.description = Extraction d'entités nommées de noms de personnes
post.responses.default.description = Permet d'extraire les entités nommées correspondant à des noms de personnes dans du texte
post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream
post.summary = Extraction d'entités nommées de noms de personnes
post.requestBody.required = true
post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream
post.parameters.0.in = query
post.parameters.0.name = indent
post.parameters.0.schema.type = boolean
post.parameters.0.description = Indent or not the JSON Result
# Examples
post.requestBody.content.application/json.example.0.value = Python is widely used in data science. Bob R. uses it ; he works for the CNRS
post.requestBody.content.application/json.example.1.value = Jean Dupont assiste àu festival de Cannes sur la côte d'Azur.
post.responses.default.content.application/json.example.0.value.PER.0 = Bob R.
post.responses.default.content.application/json.example.0.value.LOC = []
post.responses.default.content.application/json.example.0.value.ORG.0 = CNRS
post.responses.default.content.application/json.example.0.value.MISC.0 = Python
post.responses.default.content.application/json.example.1.value.PER.0 = Jean Dupont
post.responses.default.content.application/json.example.1.value.LOC.0 = côte d'Azur
post.responses.default.content.application/json.example.1.value.ORG = []
post.responses.default.content.application/json.example.1.value.MISC.0 = festival de Cannes
;'
[use]
plugin = @ezs/spawn
plugin = @ezs/basics

[JSONParse]
separator = *

[expand]
path = value
size = 10

[expand/exec]
# command should be executable !
command = ./v1/tagger.py

[dump]
indent = env('indent', false)
29 changes: 29 additions & 0 deletions services/person-ner/v1/tagger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import spacy
import json
import sys

nlp = spacy.load("xx_ent_wiki_sm")

for line in sys.stdin:
line = json.loads(line)
res = {"PER":[],"LOC":[],"ORG":[],"MISC":[]}
try:
value = line["value"]
except KeyError:
value = ""

try:
doc = nlp(value)
doc = doc.ents
except Exception:
doc = []

for ent in doc:
res[ent.label_].append(ent.text)

line["value"] = res

sys.stdout.write(json.dumps(line))
sys.stdout.write("\n")

0 comments on commit f12ff3c

Please sign in to comment.