Skip to content

Commit

Permalink
Merge pull request #16 from simonflueckiger/simonflueckiger-update-to…
Browse files Browse the repository at this point in the history
…-tesserocr-2.6.0

Update to tesserocr 2.6.0
  • Loading branch information
simonflueckiger authored Jun 13, 2023
2 parents d1d880a + 28115a6 commit dc19009
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 32 deletions.
51 changes: 37 additions & 14 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
version: 2.5.2.{build}
version: 2.6.0.{build}
image: Visual Studio 2022

environment:
# https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json
VCPKG_COMMIT_ID: ddea2bb # Leptonica 1.83.1
PYTHON_VERSIONS: 36, 37, 38, 39, 310, 311
# 2023-01-18 conda-build currently not supported for python 3.11 https://github.com/conda/conda/issues/11170
CONDA_SKIP_PYTHON_VERSIONS: 311
# they added support for conda >= v23.5.0 but 32bit version is stuck at v4.x
#CONDA_SKIP_PYTHON_VERSIONS_64: 311
CONDA_SKIP_PYTHON_VERSIONS_32: 311

matrix:
- job_name: Build Wheels - Tesseract 3.x
job_group: Build Wheels
VCPKG_COMMIT_ID: 1e9facc # Leptonica 1.82.0 (https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json)
TESSERACT_COMMIT_ID: 7e5f0d6 # Tesseract 3.05.02

- job_name: Build Conda Packages - Tesseract 3.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 3.x

- job_name: Build Wheels - Tesseract 4.x
job_group: Build Wheels
VCPKG_COMMIT_ID: 1e9facc # Leptonica 1.82.0 (https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json)
TESSERACT_COMMIT_ID: f38e7a7 # Tesseract 4.1.3

- job_name: Build Conda Packages - Tesseract 4.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 4.x

- job_name: Build Wheels - Tesseract 5.x
job_group: Build Wheels
VCPKG_COMMIT_ID: ddea2bb # Leptonica 1.83.1 (https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json)
TESSERACT_COMMIT_ID: 71af454 # Tesseract 5.3.1

- job_name: Build Conda Packages - Tesseract 5.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 5.x
Expand Down Expand Up @@ -143,6 +146,9 @@ for:
# ------ Build tesserocr ----------------------------------------
cd tesserocr
# dumpbin.exe required for finding dependencies in setup.py
$env:Path = "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin;" + $env:Path
# for each {64bit and 32bit} x {Python Versions}
ForEach ($platform_x64 in @($true, $false)) {
Expand Down Expand Up @@ -214,7 +220,8 @@ for:

build_script:
- pwsh: |
$conda_skip_python_versions = $env:CONDA_SKIP_PYTHON_VERSIONS -split ", "
$conda_skip_python_versions_64 = $env:CONDA_SKIP_PYTHON_VERSIONS_64 -split ", "
$conda_skip_python_versions_32 = $env:CONDA_SKIP_PYTHON_VERSIONS_32 -split ", "
# create necessary directories
New-Item conda_build -ItemType Directory
Expand Down Expand Up @@ -248,15 +255,31 @@ for:
# (which would require closing and re-opening of powershell session)
# this is necessary for creating and activating new environments
# any Miniconda version installed on AppVeyor could be used as long as we update it
if ($platform_x64) {
(& "C:\Miniconda37-x64\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
if ($platform_x64) {
# download and install miniconda 64bit
$filename = "Miniconda3-py39_23.3.1-0-Windows-x86_64.exe"
Invoke-WebRequest https://repo.anaconda.com/miniconda/$filename -OutFile $filename
Start-Process -FilePath $filename -ArgumentList "/InstallationType=JustMe", "/RegisterPython=0", "/S", "/D=$env:UserProfile\Miniconda3-x64" -Wait -NoNewWindow
(& "$env:UserProfile\Miniconda3-x64\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
# we want the latest conda to build for the most recent python version
# (since more recent Miniconda versions like Miniconda310 are not yet installed on this AppVeyor image)
# conda version >= 23.5.0 is required (only attainable with miniconda >= 3.8) for conda-build on python 3.11,
# using "conda update -y -n base -c defaults conda" doesn't work if installed version too far behind
conda install -y -n base -c defaults conda=23.5.0
} else {
(& "C:\Miniconda37\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
# download and install miniconda 32bit
$filename = "Miniconda3-py39_4.12.0-Windows-x86.exe"
Invoke-WebRequest https://repo.anaconda.com/miniconda/$filename -OutFile $filename
Start-Process -FilePath $filename -ArgumentList "/InstallationType=JustMe", "/RegisterPython=0", "/S", "/D=$env:UserProfile\Miniconda3" -Wait -NoNewWindow
(& "$env:UserProfile\Miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
# we want the latest conda to build for the most recent python version
# 32bit version is stuck at v4.x
conda update -y -n base -c defaults conda
}
# we want the latest conda to build for the most recent python version
# (since more recent Miniconda versions like Miniconda310 are not yet installed on this AppVeyor image)
conda update -y -n base -c defaults conda
$conda_skip_python_versions = if ($platform_x64) { $conda_skip_python_versions_64 } else { $conda_skip_python_versions_32 }
ForEach ($package in $packages) {
if ($conda_skip_python_versions.Contains($package.py_ver_str)) { Continue }
Expand Down
41 changes: 24 additions & 17 deletions res/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import shutil
import codecs
import re
import subprocess
from distutils.util import strtobool
import setuptools # needed for bdist_wheel
from distutils.core import setup
Expand Down Expand Up @@ -79,6 +80,27 @@ def version_to_int(version):
return int(version_str, 16)


def find_dll_dependencies_recursively(dll_path, search_paths):
dumpbin = subprocess.run(['dumpbin.exe', '/dependents', dll_path], universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
dependency_names = re.findall(r'^\s{4}(\S*\.dll)$', dumpbin.stdout, re.MULTILINE)

dependencies = []
for dependency_name in dependency_names:
for search_path in search_paths:
dependency_direct_full_path = os.path.join(search_path, dependency_name)
if os.path.isfile(dependency_direct_full_path):
dependencies.append(dependency_direct_full_path)
break

dependencies_recursive = []
for dependency_direct_full_path in dependencies:
dependencies_recursive.extend(find_dll_dependencies_recursively(dependency_direct_full_path, search_paths))

dependencies.extend(dependencies_recursive)

return list(set(dependencies))


def find_libraries(library_stems, search_paths, extension):
library_paths = []

Expand Down Expand Up @@ -163,23 +185,8 @@ def __init__(self, name, sources, *args, **kw):
"leptonica"
]

# indentations with respect to dependency
runtime_libraries = [
"tesseract",
"leptonica",
"gif",
"jpeg",
"openjp2",
"png",
"zlib",
"tiff",
"lzma",
"webpmux",
"webp"
]


runtime_library_paths = find_libraries(runtime_libraries, [vcpkg_bin, tesseract_bin], "dll")
runtime_library_paths = find_libraries(["tesseract"], [tesseract_bin], "dll")
runtime_library_paths.extend(find_dll_dependencies_recursively(runtime_library_paths[0], [vcpkg_bin, tesseract_bin]))
_LOGGER.info("runtime libraries found:\n\t{}".format("\n\t".join(runtime_library_paths)))

build_dependency_paths = find_libraries(build_dependencies, [vcpkg_lib, tesseract_lib], "lib")
Expand Down
2 changes: 1 addition & 1 deletion tesserocr

0 comments on commit dc19009

Please sign in to comment.