Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

group some modules into isolated venvs… #118

Merged
merged 8 commits into from
Jul 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ RUN echo "> $PREFIX/bin/activate" >> docker.sh
RUN echo "make deps-ubuntu" >> docker.sh
# build/install all tools of the requested modules:
RUN echo "make all" >> docker.sh
# post-install fixup against conflicting requirements
RUN echo "make fix-pip" >> docker.sh
# remove unneeded automatic deps and clear pkg cache
RUN echo "apt-get -y autoremove && apt-get clean" >> docker.sh
# remove source directories from image, unless using editable mode
Expand Down
136 changes: 116 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ GIT_RECURSIVE = # --recursive
ALL_TESSERACT_MODELS = eng equ osd $(TESSERACT_MODELS)

# directory for virtual Python environment
# (but re-use if already active):
# (but re-use if already active); overriden
# to nested venv in recursive calls for modules
# that have known dependency clashes with others
VIRTUAL_ENV ?= $(CURDIR)/venv
bertsky marked this conversation as resolved.
Show resolved Hide resolved
SUB_VENV = $(VIRTUAL_ENV)/local/sub-venv

BIN := $(VIRTUAL_ENV)/bin
SHARE := $(VIRTUAL_ENV)/share
ACTIVATE_VENV := $(VIRTUAL_ENV)/bin/activate
BIN = $(VIRTUAL_ENV)/bin
SHARE = $(VIRTUAL_ENV)/share
ACTIVATE_VENV = $(VIRTUAL_ENV)/bin/activate

define WGET
$(if $(shell which wget),wget -nv -O $(1) $(2),$(if $(shell which curl),curl -L -o $(1) $(2),$(error "found no cmdline downloader (wget/curl)")))
Expand Down Expand Up @@ -55,7 +58,8 @@ endif
all: modules # add OCRD_EXECUTABLES at the end

clean: # add more prerequisites for clean below
$(RM) -r $(CURDIR)/venv
$(RM) -r $(SUB_VENV)/*
$(RM) -r $(CURDIR)/venv # deliberately not using VIRTUAL_ENV here

define HELP
cat <<"EOF"
Expand All @@ -70,10 +74,10 @@ Targets:
all: installs all executables of all modules
install-tesseract: download, build and install Tesseract (with required models)
install-tesseract-training: build and install Tesseract training tools
fix-pip: try to repair conflicting requirements
clean: removes the virtual environment directory, and clean-*
clean-tesseract: remove the build directory for tesseract
clean-olena: remove the build directory for ocrd_olena
deinit: clean, then deinit and rmdir all submodules
docker: (re)build a docker image including all executables
dockers: (re)build docker images for some pre-selected subsets of modules

Expand Down Expand Up @@ -109,11 +113,17 @@ $(OCRD_MODULES): always-update
touch $@; fi
endif

deinit: clean
.PHONY: deinit
deinit:
git submodule deinit --all # --force
git submodule status | while read stat dir ver; do rmdir $$dir; done

# Get Python modules.

$(ACTIVATE_VENV) $(VIRTUAL_ENV):
$(PYTHON) -m venv $(VIRTUAL_ENV)
. $(ACTIVATE_VENV) && $(PIP) install --upgrade $(PIP_OPTIONS_E) pip
. $(ACTIVATE_VENV) && $(PIP) install --upgrade $(PIP_OPTIONS_E) pip setuptools

.PHONY: wheel
wheel: $(BIN)/wheel
Expand Down Expand Up @@ -189,26 +199,44 @@ OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-train
OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-eval
OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-repl
$(call multirule,$(OCRD_COR_ASV_ANN)): cor-asv-ann
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_COR_ASV_ANN))
$(call delegate_venv,$(OCRD_COR_ASV_ANN))
$(OCRD_COR_ASV_ANN): VIRTUAL_ENV := $(SUB_VENV)/headless-tf1
else
$(pip_install)
endif
endif

ifneq ($(findstring cor-asv-fst, $(OCRD_MODULES)),)
deps-ubuntu: cor-asv-fst
OCRD_EXECUTABLES += $(OCRD_COR_ASV_FST)
OCRD_COR_ASV_FST := $(BIN)/ocrd-cor-asv-fst-process
OCRD_COR_ASV_FST += $(BIN)/cor-asv-fst-train
$(call multirule,$(OCRD_COR_ASV_FST)): cor-asv-fst
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_COR_ASV_FST))
$(call delegate_venv,$(OCRD_COR_ASV_FST))
$(OCRD_COR_ASV_FST): VIRTUAL_ENV := $(SUB_VENV)/headless-tf1
else
$(MAKE) -C $< deps
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_keraslm, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_KERASLM)
OCRD_KERASLM := $(BIN)/ocrd-keraslm-rate
OCRD_KERASLM += $(BIN)/keraslm-rate
$(call multirule,$(OCRD_KERASLM)): ocrd_keraslm
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_KERASLM))
$(call delegate_venv,$(OCRD_KERASLM))
$(OCRD_KERASLM): VIRTUAL_ENV := $(SUB_VENV)/headless-tf1
else
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_im6convert, $(OCRD_MODULES)),)
deps-ubuntu: ocrd_im6convert
Expand Down Expand Up @@ -258,12 +286,22 @@ endif
ifneq ($(findstring ocrd_segment, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_SEGMENT)
OCRD_SEGMENT := $(BIN)/ocrd-segment-evaluate
OCRD_SEGMENT += $(BIN)/ocrd-segment-from-masks
kba marked this conversation as resolved.
Show resolved Hide resolved
OCRD_SEGMENT += $(BIN)/ocrd-segment-from-coco
OCRD_SEGMENT += $(BIN)/ocrd-segment-extract-lines
OCRD_SEGMENT += $(BIN)/ocrd-segment-extract-regions
OCRD_SEGMENT += $(BIN)/ocrd-segment-extract-pages
OCRD_SEGMENT += $(BIN)/ocrd-segment-replace-original
OCRD_SEGMENT += $(BIN)/ocrd-segment-repair
$(call multirule,$(OCRD_SEGMENT)): ocrd_segment
ifeq (0,$(MAKELEVEL))
kba marked this conversation as resolved.
Show resolved Hide resolved
$(MAKE) -B -o $< $(notdir $(OCRD_SEGMENT))
$(call delegate_venv,$(OCRD_SEGMENT))
$(OCRD_SEGMENT): VIRTUAL_ENV := $(SUB_VENV)/headless-tf1
else
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_tesserocr, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_TESSEROCR)
Expand Down Expand Up @@ -319,16 +357,28 @@ ifneq ($(findstring ocrd_calamari, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_CALAMARI)
OCRD_CALAMARI := $(BIN)/ocrd-calamari-recognize
$(OCRD_CALAMARI): ocrd_calamari
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_CALAMARI))
$(call delegate_venv,$(OCRD_CALAMARI))
$(OCRD_CALAMARI): VIRTUAL_ENV := $(SUB_VENV)/headless-tf1
else
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_pc_segmentation, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_PC_SEGMENTATION)
OCRD_PC_SEGMENTATION := $(BIN)/ocrd-pc-segmentation
$(OCRD_PC_SEGMENTATION): ocrd_pc_segmentation
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_PC_SEGMENTATION))
$(call delegate_venv,$(OCRD_PC_SEGMENTATION))
$(OCRD_PC_SEGMENTATION): VIRTUAL_ENV := $(SUB_VENV)/headless-tf21
else
. $(ACTIVATE_VENV) && $(MAKE) -C $< deps
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_anybaseocr, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_ANYBASEOCR)
Expand All @@ -341,23 +391,41 @@ OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-textline
OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis
OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-block-segmentation
$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_ANYBASEOCR))
$(call delegate_venv,$(OCRD_ANYBASEOCR))
$(OCRD_ANYBASEOCR): VIRTUAL_ENV := $(SUB_VENV)/headless-tf22
else
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_typegroups_classifier, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_TYPECLASS)
OCRD_TYPECLASS := $(BIN)/ocrd-typegroups-classifier
OCRD_TYPECLASS += $(BIN)/typegroups-classifier
$(call multirule,$(OCRD_TYPECLASS)): ocrd_typegroups_classifier
ifeq (0,$(MAKELEVEL))
kba marked this conversation as resolved.
Show resolved Hide resolved
$(MAKE) -B -o $< $(notdir $(OCRD_TYPECLASS))
$(call delegate_venv,$(OCRD_TYPECLASS))
$(OCRD_TYPECLASS): VIRTUAL_ENV := $(SUB_VENV)/headless-torch14
else
$(pip_install)
endif
endif

ifneq ($(findstring sbb_textline_detector, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(SBB_LINE_DETECTOR)
SBB_LINE_DETECTOR := $(BIN)/ocrd-sbb-line-detector
SBB_LINE_DETECTOR := $(BIN)/ocrd-sbb-textline-detector
$(SBB_LINE_DETECTOR): sbb_textline_detector
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(SBB_LINE_DETECTOR))
$(call delegate_venv,$(SBB_LINE_DETECTOR))
$(SBB_LINE_DETECTOR): VIRTUAL_ENV := $(SUB_VENV)/headless-tf1
else
$(pip_install)
endif
endif

ifneq ($(findstring ocrd_repair_inconsistencies, $(OCRD_MODULES)),)
OCRD_EXECUTABLES += $(OCRD_REPAIR_INCONSISTENCIES)
Expand All @@ -384,6 +452,44 @@ define pip_install
. $(ACTIVATE_VENV) && cd $< && $(PIP) install --no-deps --force-reinstall $(PIP_OPTIONS) .
endef

# pattern for recursive make:
# $(executables...): module...
# ifeq (0,$(MAKELEVEL))
# $(MAKE) -B -o $< $(notdir $(executables...))
# $(call delegate_venv,$(executables...))
# $(executables...): VIRTUAL_ENV := $(SUB_VENV)/name
# else
# actual recipe...
# fi
# -- calls make with -B to ensure the nested recipe is run as well,
# but also with -o $< to avoid updating the submodule twice);
# overrides the venv path for nested make via target-specific var

# canned recipes after recursive make for
# modules in nested venvs:

# echo a shell script that relays to
# the (currently active) sub-venv
# (replacing the outer by the inner
# venv directory to ensure there
# is no infinite recursion when
# the sub-venv does not have the
# executable)
# TODO: variant for relay to Docker
define delegator
#!/bin/bash
. $(ACTIVATE_VENV) && $(BIN)/$(notdir $(1)) "$$@"
bertsky marked this conversation as resolved.
Show resolved Hide resolved
endef
# create shell scripts that relay to
# the (currently active) sub-venv
define delegate_venv
$(foreach executable,$(1),$(file >$(executable),$(call delegator,$(executable))))
chmod +x $(1)
endef

# export VIRTUAL_ENV etc to recursive make
.EXPORT_ALL_VARIABLES:

# avoid making these .PHONY so they do not have to be repeated:
# clstm tesserocr
$(SHARE)/%: % | $(ACTIVATE_VENV) $(SHARE)
Expand All @@ -397,18 +503,6 @@ $(SHARE):
$(OCRD_EXECUTABLES) $(BIN)/wheel: | $(ACTIVATE_VENV)
$(OCRD_EXECUTABLES): | $(BIN)/wheel

.PHONY: fix-pip
# temporary workaround for conflicting requirements between modules:
# - opencv-python instead of opencv-python-headless (which needs X11 libs)
# (pulled by ocrd_anybaseocr and segmentation-runner)
# - tensorflow>=2.0, tensorflow_gpu in another version
# - pillow==5.4.1 instead of >=6.2
fix-pip:
. $(ACTIVATE_VENV) && $(PIP) install --force-reinstall $(PIP_OPTIONS_E) \
opencv-python-headless \
"pillow>=7.1.0" \
$$($(PIP) list | grep tensorflow | sed -nE '/tensorflow +1[.]/s/ +/>=2.0/p;/tensorflow-gpu +2[.]/s/ +/~=1.15.3/p')

# At last, we know what all OCRD_EXECUTABLES are:
all: $(OCRD_EXECUTABLES)
show:
Expand Down Expand Up @@ -555,6 +649,8 @@ docker%: Dockerfile $(DOCKER_MODULES)
--build-arg PIP_OPTIONS="$(PIP_OPTIONS)" \
-t $(DOCKER_TAG):$(or $(*:-%=%),latest) .

docker: DOCKER_MODULES ?= $(OCRD_MODULES)
docker: docker-latest

# do not search for implicit rules here:
Makefile: ;
Expand Down
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ in the current shell environment via PATH and PYTHONHOME.)
* [<em>modules</em>](#modules)
* [<em>ocrd</em>](#ocrd)
* [<em>all</em>](#all)
* [<em>fix-pip</em>](#fix-pip)
* [<em>docker</em>](#docker)
* [<em>clean</em>](#clean)
* [<em>show</em>](#show)
Expand Down Expand Up @@ -134,10 +133,6 @@ Install only OCR-D/core and its CLI `ocrd` into the venv.

Install executables from all modules into the venv. (Depends on [_modules_](#modules) and [_ocrd_](#ocrd).)

#### _fix-pip_

Fix incompatible/inconsistent pip requirements between all modules

#### _docker_

(Re-)build a docker image for all modules/executables. (Depends on [_modules_](#modules).)
Expand Down Expand Up @@ -377,9 +372,13 @@ Modules may require mutually exclusive sets of dependent packages.
As long as we keep reinstalling the headless variant and no such package attempts GUI, we should be fine.
Custom build (as needed for ARM) under the _module_ `opencv-python` already creates the headless variant.

- PyTorch:
* `torch<1.0`
* `torch>=1.0`

- ...

_(Solved temporarily by post-installation `fix-pip`.)_
_(Solved by managing and delegating to different subsets of venvs.)_

### System requirements

Expand Down