From 9fd0d295e106a561c9f0a7e6a1b71e17a513295a Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 09:43:05 +0900 Subject: [PATCH 1/9] add dockerfile --- Dockerfile | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..b00b7f21 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,121 @@ +## Docker GROBID-quantities image using deep learning models and/or CRF models, and various python modules +## Borrowed from https://github.com/kermitt2/grobid/blob/master/Dockerfile.delft +## See https://grobid.readthedocs.io/en/latest/Grobid-docker/ + +## usage example with grobid: https://github.com/kermitt2/grobid/blob/master/Dockerfile.delft + +## docker build -t lfoppiano/grobid-quantities:0.7.0 --build-arg GROBID_VERSION=0.7.0 --file Dockerfile . + +## no GPU: +## docker run -t --rm --init -p 8072:8072 -p 8073:8073 -v config.yml:/opt/grobid/grobid-quantities:ro lfoppiano/grobid-quantities:0.7.0 + +## allocate all available GPUs (only Linux with proper nvidia driver installed on host machine): +## docker run --rm --gpus all --init -p 8072:8072 -p 8073:8073 -v grobid.yaml:/opt/grobid/grobid-home/config/grobid.yaml:ro lfoppiano/grobid-superconductors:0.3.0-SNAPSHOT + +# ------------------- +# build builder image +# ------------------- + +FROM openjdk:8u275-jdk as builder + +USER root + +RUN apt-get update && \ + apt-get -y --no-install-recommends install apt-utils libxml2 git + +RUN git clone https://github.com/kermitt2/grobid.git /opt/grobid-source && cd /opt/grobid-source && git checkout 0.7.0 +WORKDIR /opt/grobid-source +COPY gradle.properties . + +RUN git clone https://github.com/kermitt2/grobid-quantities.git ./grobid-quantities && cd grobid-quantities && git checkout 0.7.0 +WORKDIR /opt/grobid-source/grobid-quantities +COPY gradle.properties . + +COPY gradle.properties ./grobid-quantities/ + +# Adjust config +RUN sed -i '/#Docker-ignore-log-start/,/#Docker-ignore-log-end/d' ./grobid-quantities/resources/config/config-docker.yml + +# Preparing models +RUN rm -rf /opt/grobid-source/grobid-home/models/* + +WORKDIR /opt/grobid-source/grobid-quantities +RUN ./gradlew clean assemble --no-daemon --info --stacktrace +#RUN ./gradlew installScibert --no-daemon --info --stacktrace && rm -f /opt/grobid-source/grobid-home/models/*.zip +RUN ./gradlew copyModels --no-daemon --info --stacktrace && rm -f /opt/grobid-source/grobid-home/models/*.tar.gz + + +WORKDIR /opt + +# ------------------- +# build runtime image +# ------------------- + +FROM grobid/grobid:0.7.0 + +# setting locale is likely useless but to be sure +ENV LANG C.UTF-8 + +# install JRE 8, python and other dependencies +RUN apt-get update && \ + apt-get -y --no-install-recommends install git wget +# apt-get -y remove python3.6 && \ +# DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \ +# apt-get -y --no-install-recommends install git python3.7 python3.7-venv python3.7-dev python3.7-distutil + +WORKDIR /opt/grobid + +RUN mkdir -p /opt/grobid/grobid-superconductors +COPY --from=builder /opt/grobid-source/grobid-home/models ./grobid-home/models +COPY --from=builder /opt/grobid-source/grobid-quantities/build/libs/* ./grobid-quantities/ +COPY --from=builder /opt/grobid-source/grobid-quantities/resources/config/config.yml ./grobid-quantities/ + +VOLUME ["/opt/grobid/grobid-home/tmp"] + +# Install requirements +WORKDIR /opt/grobid + +#RUN pip install git+https://github.com/lfoppiano/MaterialParser +#RUN pip install -e /opt/grobid/grobid-superconductors-tools/materialParser + +#RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: \/opt\/grobid\/venv/g' grobid-superconductors/config.yml +RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' grobid-quantities/config.yml +RUN sed -i 's/grobidHome:.*/grobidHome: grobid-home/g' grobid-quantities/config.yml + +# JProfiler +#RUN wget https://download-gcdn.ej-technologies.com/jprofiler/jprofiler_linux_12_0_2.tar.gz -P /tmp/ && \ +# tar -xzf /tmp/jprofiler_linux_12_0_2.tar.gz -C /usr/local &&\ +# rm /tmp/jprofiler_linux_12_0_2.tar.gz + +EXPOSE 8072 8073 +#EXPOSE 8080 8081 + +#CMD ["java", "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"] +#CMD ["java", "-agentpath:/usr/local/jprofiler12.0.2/bin/linux-x64/libjprofilerti.so=port=8849", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"] +CMD ["java", "-jar", "grobid-quantities/grobid-quantities-0.7.0-SNAPSHOT-onejar.jar", "server", "grobid-quantities/config.yml"] + +ARG GROBID_VERSION + + +LABEL \ + authors="Luca Foppiano, Patrice Lopez" \ + org.label-schema.name="grobid-quantities" \ + org.label-schema.description="Docker image for grobid-quantities service" \ + org.label-schema.url="https://github.com/kermitt2/grobid-quantities" \ + org.label-schema.version=${GROBID_VERSION} + + +## Docker tricks: + +# - remove all stopped containers +# > docker rm $(docker ps -a -q) + +# - remove all unused images +# > docker rmi $(docker images --filter "dangling=true" -q --no-trunc) + +# - remove all untagged images +# > docker rmi $(docker images | grep "^" | awk "{print $3}") + +# - "Cannot connect to the Docker daemon. Is the docker daemon running on this host?" +# > docker-machine restart + From e48d17fa20cbdd568225e65714149ba3cc2fb4e9 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 10:00:44 +0900 Subject: [PATCH 2/9] add start/termination of part to ignore --- resources/config/config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/resources/config/config.yml b/resources/config/config.yml index 1cd10fee..14692885 100644 --- a/resources/config/config.yml +++ b/resources/config/config.yml @@ -77,6 +77,7 @@ logging: appenders: - type: console threshold: INFO +#Docker-ignore-log-start - type: file threshold: DEBUG logFormat: "%-6level [%d{HH:mm:ss.SSS}] [%t] %logger{5} - %X{code} %msg %n" @@ -85,4 +86,6 @@ logging: archivedFileCount: 7 timeZone: UTC maxFileSize: 10MB +#Docker-ignore-log-end timeZone: UTC + From 91411b3e17691afd6e8ca67f7a19ae13774f015d Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 10:33:49 +0900 Subject: [PATCH 3/9] update dockerfile --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index b00b7f21..e61a865c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,10 +31,8 @@ RUN git clone https://github.com/kermitt2/grobid-quantities.git ./grobid-quantit WORKDIR /opt/grobid-source/grobid-quantities COPY gradle.properties . -COPY gradle.properties ./grobid-quantities/ - # Adjust config -RUN sed -i '/#Docker-ignore-log-start/,/#Docker-ignore-log-end/d' ./grobid-quantities/resources/config/config-docker.yml +RUN sed -i '/#Docker-ignore-log-start/,/#Docker-ignore-log-end/d' ./resources/config/config.yml # Preparing models RUN rm -rf /opt/grobid-source/grobid-home/models/* From 5e04215245113d061c8c559f21f95e99ee87aab5 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 11:02:15 +0900 Subject: [PATCH 4/9] last changes --- Dockerfile | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index e61a865c..6c8bfee8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,7 +63,7 @@ RUN apt-get update && \ WORKDIR /opt/grobid -RUN mkdir -p /opt/grobid/grobid-superconductors +RUN mkdir -p /opt/grobid/grobid-quantities COPY --from=builder /opt/grobid-source/grobid-home/models ./grobid-home/models COPY --from=builder /opt/grobid-source/grobid-quantities/build/libs/* ./grobid-quantities/ COPY --from=builder /opt/grobid-source/grobid-quantities/resources/config/config.yml ./grobid-quantities/ @@ -73,10 +73,6 @@ VOLUME ["/opt/grobid/grobid-home/tmp"] # Install requirements WORKDIR /opt/grobid -#RUN pip install git+https://github.com/lfoppiano/MaterialParser -#RUN pip install -e /opt/grobid/grobid-superconductors-tools/materialParser - -#RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: \/opt\/grobid\/venv/g' grobid-superconductors/config.yml RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' grobid-quantities/config.yml RUN sed -i 's/grobidHome:.*/grobidHome: grobid-home/g' grobid-quantities/config.yml @@ -85,12 +81,12 @@ RUN sed -i 's/grobidHome:.*/grobidHome: grobid-home/g' grobid-quantities/config. # tar -xzf /tmp/jprofiler_linux_12_0_2.tar.gz -C /usr/local &&\ # rm /tmp/jprofiler_linux_12_0_2.tar.gz -EXPOSE 8072 8073 +EXPOSE 8060 8061 #EXPOSE 8080 8081 #CMD ["java", "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"] #CMD ["java", "-agentpath:/usr/local/jprofiler12.0.2/bin/linux-x64/libjprofilerti.so=port=8849", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"] -CMD ["java", "-jar", "grobid-quantities/grobid-quantities-0.7.0-SNAPSHOT-onejar.jar", "server", "grobid-quantities/config.yml"] +CMD ["java", "-jar", "grobid-quantities/grobid-quantities-0.7.0-onejar.jar", "server", "grobid-quantities/config.yml"] ARG GROBID_VERSION From 4dcbb46409419064711f5c1e3f1e482b0db5cb68 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 11:25:08 +0900 Subject: [PATCH 5/9] add clearnlp --- Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6c8bfee8..cdbc597d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,18 +63,19 @@ RUN apt-get update && \ WORKDIR /opt/grobid -RUN mkdir -p /opt/grobid/grobid-quantities +RUN mkdir -p /opt/grobid/grobid-quantities/resources/clearnlp COPY --from=builder /opt/grobid-source/grobid-home/models ./grobid-home/models COPY --from=builder /opt/grobid-source/grobid-quantities/build/libs/* ./grobid-quantities/ COPY --from=builder /opt/grobid-source/grobid-quantities/resources/config/config.yml ./grobid-quantities/ +COPY --from=builder /opt/grobid-source/grobid-quantities/resources/clearnlp/* ./grobid-quantities/resources/clearnlp VOLUME ["/opt/grobid/grobid-home/tmp"] # Install requirements -WORKDIR /opt/grobid +WORKDIR /opt/grobid/grobid-quantities -RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' grobid-quantities/config.yml -RUN sed -i 's/grobidHome:.*/grobidHome: grobid-home/g' grobid-quantities/config.yml +RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' config.yml +RUN sed -i 's/grobidHome:.*/grobidHome: grobid-home/g' config.yml # JProfiler #RUN wget https://download-gcdn.ej-technologies.com/jprofiler/jprofiler_linux_12_0_2.tar.gz -P /tmp/ && \ From 00465cc0c3a3570d6e673ec22ed91ea825b41ed9 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 14:24:50 +0900 Subject: [PATCH 6/9] last updates --- Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index cdbc597d..38b01a1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,11 +63,12 @@ RUN apt-get update && \ WORKDIR /opt/grobid -RUN mkdir -p /opt/grobid/grobid-quantities/resources/clearnlp +RUN mkdir -p /opt/grobid/grobid-quantities/resources/clearnlp/models /opt/grobid/grobid-quantities/resources/clearnlp/config COPY --from=builder /opt/grobid-source/grobid-home/models ./grobid-home/models COPY --from=builder /opt/grobid-source/grobid-quantities/build/libs/* ./grobid-quantities/ COPY --from=builder /opt/grobid-source/grobid-quantities/resources/config/config.yml ./grobid-quantities/ -COPY --from=builder /opt/grobid-source/grobid-quantities/resources/clearnlp/* ./grobid-quantities/resources/clearnlp +COPY --from=builder /opt/grobid-source/grobid-quantities/resources/clearnlp/config/* ./grobid-quantities/resources/clearnlp/config +COPY --from=builder /opt/grobid-source/grobid-quantities/resources/clearnlp/models/* ./grobid-quantities/resources/clearnlp/models VOLUME ["/opt/grobid/grobid-home/tmp"] @@ -87,7 +88,7 @@ EXPOSE 8060 8061 #CMD ["java", "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"] #CMD ["java", "-agentpath:/usr/local/jprofiler12.0.2/bin/linux-x64/libjprofilerti.so=port=8849", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"] -CMD ["java", "-jar", "grobid-quantities/grobid-quantities-0.7.0-onejar.jar", "server", "grobid-quantities/config.yml"] +CMD ["java", "-jar", "grobid-quantities-0.7.0-onejar.jar", "server", "config.yml"] ARG GROBID_VERSION From eae71d06df9f2d5be7c19b04239d3b28a59988b9 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 14:29:48 +0900 Subject: [PATCH 7/9] add config-docker.yml --- resources/config/config-docker.yml | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 resources/config/config-docker.yml diff --git a/resources/config/config-docker.yml b/resources/config/config-docker.yml new file mode 100644 index 00000000..9104116d --- /dev/null +++ b/resources/config/config-docker.yml @@ -0,0 +1,91 @@ +grobidHome: /opt/grobid/grobid-home + +corsAllowedOrigins: "*" +corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD" +corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin" + +# Limit the maximum number of requests +maxParallelRequests: 0 + +models: + - name: "quantities" + engine: "wapiti" + #engine: "delft" + wapiti: + # wapiti training parameters, they will be used at training time only + epsilon: 0.00001 + window: 30 + nbMaxIterations: 2000 + delft: + # deep learning parameters + architecture: "BidLSTM_CRF" + #architecture: "scibert" + useELMo: false + embeddings_name: "glove-840B" + + - name: "units" + engine: "wapiti" + #engine: "delft" + wapiti: + # wapiti training parameters, they will be used at training time only + epsilon: 0.00001 + window: 30 + nbMaxIterations: 2000 + delft: + # deep learning parameters + architecture: "BidLSTM_CRF" + #architecture: "scibert" + useELMo: false + embeddings_name: "glove-840B" + + - name: "values" + engine: "wapiti" + #engine: "delft" + wapiti: + # wapiti training parameters, they will be used at training time only + epsilon: 0.00001 + window: 30 + nbMaxIterations: 2000 + delft: + # deep learning parameters + architecture: "BidLSTM_CRF" + #architecture: "scibert" + useELMo: false + embeddings_name: "glove-840B" + + +views: + .mustache: + cache: false + +server: + type: custom + idleTimeout: 120 seconds + applicationConnectors: + - type: http + port: 8060 + adminConnectors: + - type: http + port: 8061 + registerDefaultExceptionMappers: false + maxThreads: 2048 + maxQueuedRequests: 2048 + acceptQueueSize: 2048 + +logging: + level: INFO + appenders: + - type: console + threshold: INFO +#Docker-ignore-log-start + - type: file + threshold: DEBUG + logFormat: "%-6level [%d{HH:mm:ss.SSS}] [%t] %logger{5} - %X{code} %msg %n" + currentLogFilename: logs/grobid-quantities.log + archivedLogFilenamePattern: logs/grobid-quantities-%d{yyyy-MM-dd}-%i.log.gz + archivedFileCount: 7 + timeZone: UTC + maxFileSize: 10MB +#Docker-ignore-log-end +timeZone: UTC + From 2acf004ae3205932fbb2db6bdef13e13e49b1f5c Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 14:36:35 +0900 Subject: [PATCH 8/9] update documentation and docker file to support correctly a default configuration --- Dockerfile | 2 +- doc/gettingStarted.rst | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 38b01a1f..37e669c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -76,7 +76,7 @@ VOLUME ["/opt/grobid/grobid-home/tmp"] WORKDIR /opt/grobid/grobid-quantities RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' config.yml -RUN sed -i 's/grobidHome:.*/grobidHome: grobid-home/g' config.yml +RUN sed -i 's/grobidHome:.*/grobidHome: ../grobid-home/g' config.yml # JProfiler #RUN wget https://download-gcdn.ej-technologies.com/jprofiler/jprofiler_linux_12_0_2.tar.gz -P /tmp/ && \ diff --git a/doc/gettingStarted.rst b/doc/gettingStarted.rst index 62c8734c..af40ceef 100644 --- a/doc/gettingStarted.rst +++ b/doc/gettingStarted.rst @@ -10,6 +10,22 @@ Grobid-quantities requires *JDK 1.8 or greater* and Grobid to be installed. Install and build ~~~~~~~~~~~~~~~~~ +Docker containers +~~~~~~~~~~~~~~~~~ +The simplest way to run grobid-quantities is via docker containers. +To run the container with the default configuration: +:: + docker run --rm --init -p 8060:8060 -p 8061:8061 lfoppiano/grobid-quantities:0.7.0 + +To run the container with custom configuration, is possible by providing a configuration file with the parameter ``-v`` +Grobid quantities repository provides already the file `resources/config/config-docker.yml` that contains the correct grobidHome and can be modified to best suits ones's needs: +:: + docker run --rm --init -p 8060:8060 -p 8061:8061 -v resources/config/config-docker.yml:/opt/grobid/grobid-quantities/config.yml:ro lfoppiano/grobid-quantities:0.7.0 + + +Local installation +~~~~~~~~~~~~~~~~~~~~~ + First install the latest development version of GROBID as explained by the `documentation `_. Grobid-quantities root directory needs to be placed as sibling sub-project inside Grobid directory: From 1fc97d3469622603bbe6c9c723981adc3d51aed3 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 10 Sep 2021 14:42:55 +0900 Subject: [PATCH 9/9] minor correction --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 37e669c4..5ae18db9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -76,7 +76,7 @@ VOLUME ["/opt/grobid/grobid-home/tmp"] WORKDIR /opt/grobid/grobid-quantities RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' config.yml -RUN sed -i 's/grobidHome:.*/grobidHome: ../grobid-home/g' config.yml +RUN sed -i 's/grobidHome:.*/grobidHome: ..\/grobid-home/g' config.yml # JProfiler #RUN wget https://download-gcdn.ej-technologies.com/jprofiler/jprofiler_linux_12_0_2.tar.gz -P /tmp/ && \