Skip to content

Commit

Permalink
Merge pull request #128 from kermitt2/feature/docker-image
Browse files Browse the repository at this point in the history
Add docker image
  • Loading branch information
lfoppiano authored Sep 10, 2021
2 parents 43ef2b2 + 1fc97d3 commit dd978fc
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 0 deletions.
117 changes: 117 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
## Docker GROBID-quantities image using deep learning models and/or CRF models, and various python modules
## Borrowed from https://github.com/kermitt2/grobid/blob/master/Dockerfile.delft
## See https://grobid.readthedocs.io/en/latest/Grobid-docker/

## usage example with grobid: https://github.com/kermitt2/grobid/blob/master/Dockerfile.delft

## docker build -t lfoppiano/grobid-quantities:0.7.0 --build-arg GROBID_VERSION=0.7.0 --file Dockerfile .

## no GPU:
## docker run -t --rm --init -p 8072:8072 -p 8073:8073 -v config.yml:/opt/grobid/grobid-quantities:ro lfoppiano/grobid-quantities:0.7.0

## allocate all available GPUs (only Linux with proper nvidia driver installed on host machine):
## docker run --rm --gpus all --init -p 8072:8072 -p 8073:8073 -v grobid.yaml:/opt/grobid/grobid-home/config/grobid.yaml:ro lfoppiano/grobid-superconductors:0.3.0-SNAPSHOT

# -------------------
# build builder image
# -------------------

FROM openjdk:8u275-jdk as builder

USER root

RUN apt-get update && \
apt-get -y --no-install-recommends install apt-utils libxml2 git

RUN git clone https://github.com/kermitt2/grobid.git /opt/grobid-source && cd /opt/grobid-source && git checkout 0.7.0
WORKDIR /opt/grobid-source
COPY gradle.properties .

RUN git clone https://github.com/kermitt2/grobid-quantities.git ./grobid-quantities && cd grobid-quantities && git checkout 0.7.0
WORKDIR /opt/grobid-source/grobid-quantities
COPY gradle.properties .

# Adjust config
RUN sed -i '/#Docker-ignore-log-start/,/#Docker-ignore-log-end/d' ./resources/config/config.yml

# Preparing models
RUN rm -rf /opt/grobid-source/grobid-home/models/*

WORKDIR /opt/grobid-source/grobid-quantities
RUN ./gradlew clean assemble --no-daemon --info --stacktrace
#RUN ./gradlew installScibert --no-daemon --info --stacktrace && rm -f /opt/grobid-source/grobid-home/models/*.zip
RUN ./gradlew copyModels --no-daemon --info --stacktrace && rm -f /opt/grobid-source/grobid-home/models/*.tar.gz


WORKDIR /opt

# -------------------
# build runtime image
# -------------------

FROM grobid/grobid:0.7.0

# setting locale is likely useless but to be sure
ENV LANG C.UTF-8

# install JRE 8, python and other dependencies
RUN apt-get update && \
apt-get -y --no-install-recommends install git wget
# apt-get -y remove python3.6 && \
# DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
# apt-get -y --no-install-recommends install git python3.7 python3.7-venv python3.7-dev python3.7-distutil

WORKDIR /opt/grobid

RUN mkdir -p /opt/grobid/grobid-quantities/resources/clearnlp/models /opt/grobid/grobid-quantities/resources/clearnlp/config
COPY --from=builder /opt/grobid-source/grobid-home/models ./grobid-home/models
COPY --from=builder /opt/grobid-source/grobid-quantities/build/libs/* ./grobid-quantities/
COPY --from=builder /opt/grobid-source/grobid-quantities/resources/config/config.yml ./grobid-quantities/
COPY --from=builder /opt/grobid-source/grobid-quantities/resources/clearnlp/config/* ./grobid-quantities/resources/clearnlp/config
COPY --from=builder /opt/grobid-source/grobid-quantities/resources/clearnlp/models/* ./grobid-quantities/resources/clearnlp/models

VOLUME ["/opt/grobid/grobid-home/tmp"]

# Install requirements
WORKDIR /opt/grobid/grobid-quantities

RUN sed -i 's/pythonVirtualEnv:.*/pythonVirtualEnv: /g' config.yml
RUN sed -i 's/grobidHome:.*/grobidHome: ..\/grobid-home/g' config.yml

# JProfiler
#RUN wget https://download-gcdn.ej-technologies.com/jprofiler/jprofiler_linux_12_0_2.tar.gz -P /tmp/ && \
# tar -xzf /tmp/jprofiler_linux_12_0_2.tar.gz -C /usr/local &&\
# rm /tmp/jprofiler_linux_12_0_2.tar.gz

EXPOSE 8060 8061
#EXPOSE 8080 8081

#CMD ["java", "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"]
#CMD ["java", "-agentpath:/usr/local/jprofiler12.0.2/bin/linux-x64/libjprofilerti.so=port=8849", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"]
CMD ["java", "-jar", "grobid-quantities-0.7.0-onejar.jar", "server", "config.yml"]

ARG GROBID_VERSION


LABEL \
authors="Luca Foppiano, Patrice Lopez" \
org.label-schema.name="grobid-quantities" \
org.label-schema.description="Docker image for grobid-quantities service" \
org.label-schema.url="https://github.com/kermitt2/grobid-quantities" \
org.label-schema.version=${GROBID_VERSION}


## Docker tricks:

# - remove all stopped containers
# > docker rm $(docker ps -a -q)

# - remove all unused images
# > docker rmi $(docker images --filter "dangling=true" -q --no-trunc)

# - remove all untagged images
# > docker rmi $(docker images | grep "^<none>" | awk "{print $3}")

# - "Cannot connect to the Docker daemon. Is the docker daemon running on this host?"
# > docker-machine restart

16 changes: 16 additions & 0 deletions doc/gettingStarted.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ Grobid-quantities requires *JDK 1.8 or greater* and Grobid to be installed.
Install and build
~~~~~~~~~~~~~~~~~

Docker containers
~~~~~~~~~~~~~~~~~
The simplest way to run grobid-quantities is via docker containers.
To run the container with the default configuration:
::
docker run --rm --init -p 8060:8060 -p 8061:8061 lfoppiano/grobid-quantities:0.7.0

To run the container with custom configuration, is possible by providing a configuration file with the parameter ``-v``
Grobid quantities repository provides already the file `resources/config/config-docker.yml` that contains the correct grobidHome and can be modified to best suits ones's needs:
::
docker run --rm --init -p 8060:8060 -p 8061:8061 -v resources/config/config-docker.yml:/opt/grobid/grobid-quantities/config.yml:ro lfoppiano/grobid-quantities:0.7.0


Local installation
~~~~~~~~~~~~~~~~~~~~~

First install the latest development version of GROBID as explained by the `documentation <http://grobid.readthedocs.org>`_.

Grobid-quantities root directory needs to be placed as sibling sub-project inside Grobid directory:
Expand Down
91 changes: 91 additions & 0 deletions resources/config/config-docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
grobidHome: /opt/grobid/grobid-home

corsAllowedOrigins: "*"
corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"

# Limit the maximum number of requests
maxParallelRequests: 0

models:
- name: "quantities"
engine: "wapiti"
#engine: "delft"
wapiti:
# wapiti training parameters, they will be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 2000
delft:
# deep learning parameters
architecture: "BidLSTM_CRF"
#architecture: "scibert"
useELMo: false
embeddings_name: "glove-840B"

- name: "units"
engine: "wapiti"
#engine: "delft"
wapiti:
# wapiti training parameters, they will be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 2000
delft:
# deep learning parameters
architecture: "BidLSTM_CRF"
#architecture: "scibert"
useELMo: false
embeddings_name: "glove-840B"

- name: "values"
engine: "wapiti"
#engine: "delft"
wapiti:
# wapiti training parameters, they will be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 2000
delft:
# deep learning parameters
architecture: "BidLSTM_CRF"
#architecture: "scibert"
useELMo: false
embeddings_name: "glove-840B"


views:
.mustache:
cache: false

server:
type: custom
idleTimeout: 120 seconds
applicationConnectors:
- type: http
port: 8060
adminConnectors:
- type: http
port: 8061
registerDefaultExceptionMappers: false
maxThreads: 2048
maxQueuedRequests: 2048
acceptQueueSize: 2048

logging:
level: INFO
appenders:
- type: console
threshold: INFO
#Docker-ignore-log-start
- type: file
threshold: DEBUG
logFormat: "%-6level [%d{HH:mm:ss.SSS}] [%t] %logger{5} - %X{code} %msg %n"
currentLogFilename: logs/grobid-quantities.log
archivedLogFilenamePattern: logs/grobid-quantities-%d{yyyy-MM-dd}-%i.log.gz
archivedFileCount: 7
timeZone: UTC
maxFileSize: 10MB
#Docker-ignore-log-end
timeZone: UTC

3 changes: 3 additions & 0 deletions resources/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ logging:
appenders:
- type: console
threshold: INFO
#Docker-ignore-log-start
- type: file
threshold: DEBUG
logFormat: "%-6level [%d{HH:mm:ss.SSS}] [%t] %logger{5} - %X{code} %msg %n"
Expand All @@ -85,4 +86,6 @@ logging:
archivedFileCount: 7
timeZone: UTC
maxFileSize: 10MB
#Docker-ignore-log-end
timeZone: UTC

0 comments on commit dd978fc

Please sign in to comment.