Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to tesserocr 2.6.0 #16

Merged
merged 1 commit into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
version: 2.5.2.{build}
version: 2.6.0.{build}
image: Visual Studio 2022

environment:
# https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json
VCPKG_COMMIT_ID: ddea2bb # Leptonica 1.83.1
PYTHON_VERSIONS: 36, 37, 38, 39, 310, 311
# 2023-01-18 conda-build currently not supported for python 3.11 https://github.com/conda/conda/issues/11170
CONDA_SKIP_PYTHON_VERSIONS: 311
# they added support for conda >= v23.5.0 but 32bit version is stuck at v4.x
#CONDA_SKIP_PYTHON_VERSIONS_64: 311
CONDA_SKIP_PYTHON_VERSIONS_32: 311

matrix:
- job_name: Build Wheels - Tesseract 3.x
job_group: Build Wheels
VCPKG_COMMIT_ID: 1e9facc # Leptonica 1.82.0 (https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json)
TESSERACT_COMMIT_ID: 7e5f0d6 # Tesseract 3.05.02

- job_name: Build Conda Packages - Tesseract 3.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 3.x

- job_name: Build Wheels - Tesseract 4.x
job_group: Build Wheels
VCPKG_COMMIT_ID: 1e9facc # Leptonica 1.82.0 (https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json)
TESSERACT_COMMIT_ID: f38e7a7 # Tesseract 4.1.3

- job_name: Build Conda Packages - Tesseract 4.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 4.x

- job_name: Build Wheels - Tesseract 5.x
job_group: Build Wheels
VCPKG_COMMIT_ID: ddea2bb # Leptonica 1.83.1 (https://github.com/microsoft/vcpkg/blob/master/ports/leptonica/vcpkg.json)
TESSERACT_COMMIT_ID: 71af454 # Tesseract 5.3.1

- job_name: Build Conda Packages - Tesseract 5.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 5.x
Expand Down Expand Up @@ -143,6 +146,9 @@ for:
# ------ Build tesserocr ----------------------------------------

cd tesserocr

# dumpbin.exe required for finding dependencies in setup.py
$env:Path = "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin;" + $env:Path

# for each {64bit and 32bit} x {Python Versions}
ForEach ($platform_x64 in @($true, $false)) {
Expand Down Expand Up @@ -214,7 +220,8 @@ for:

build_script:
- pwsh: |
$conda_skip_python_versions = $env:CONDA_SKIP_PYTHON_VERSIONS -split ", "
$conda_skip_python_versions_64 = $env:CONDA_SKIP_PYTHON_VERSIONS_64 -split ", "
$conda_skip_python_versions_32 = $env:CONDA_SKIP_PYTHON_VERSIONS_32 -split ", "

# create necessary directories
New-Item conda_build -ItemType Directory
Expand Down Expand Up @@ -248,15 +255,31 @@ for:
# (which would require closing and re-opening of powershell session)
# this is necessary for creating and activating new environments
# any Miniconda version installed on AppVeyor could be used as long as we update it
if ($platform_x64) {
(& "C:\Miniconda37-x64\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
if ($platform_x64) {
# download and install miniconda 64bit
$filename = "Miniconda3-py39_23.3.1-0-Windows-x86_64.exe"
Invoke-WebRequest https://repo.anaconda.com/miniconda/$filename -OutFile $filename
Start-Process -FilePath $filename -ArgumentList "/InstallationType=JustMe", "/RegisterPython=0", "/S", "/D=$env:UserProfile\Miniconda3-x64" -Wait -NoNewWindow
(& "$env:UserProfile\Miniconda3-x64\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression

# we want the latest conda to build for the most recent python version
# (since more recent Miniconda versions like Miniconda310 are not yet installed on this AppVeyor image)
# conda version >= 23.5.0 is required (only attainable with miniconda >= 3.8) for conda-build on python 3.11,
# using "conda update -y -n base -c defaults conda" doesn't work if installed version too far behind
conda install -y -n base -c defaults conda=23.5.0
} else {
(& "C:\Miniconda37\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
# download and install miniconda 32bit
$filename = "Miniconda3-py39_4.12.0-Windows-x86.exe"
Invoke-WebRequest https://repo.anaconda.com/miniconda/$filename -OutFile $filename
Start-Process -FilePath $filename -ArgumentList "/InstallationType=JustMe", "/RegisterPython=0", "/S", "/D=$env:UserProfile\Miniconda3" -Wait -NoNewWindow
(& "$env:UserProfile\Miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression

# we want the latest conda to build for the most recent python version
# 32bit version is stuck at v4.x
conda update -y -n base -c defaults conda
}

# we want the latest conda to build for the most recent python version
# (since more recent Miniconda versions like Miniconda310 are not yet installed on this AppVeyor image)
conda update -y -n base -c defaults conda
$conda_skip_python_versions = if ($platform_x64) { $conda_skip_python_versions_64 } else { $conda_skip_python_versions_32 }

ForEach ($package in $packages) {
if ($conda_skip_python_versions.Contains($package.py_ver_str)) { Continue }
Expand Down
41 changes: 24 additions & 17 deletions res/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import shutil
import codecs
import re
import subprocess
from distutils.util import strtobool
import setuptools # needed for bdist_wheel
from distutils.core import setup
Expand Down Expand Up @@ -79,6 +80,27 @@ def version_to_int(version):
return int(version_str, 16)


def find_dll_dependencies_recursively(dll_path, search_paths):
dumpbin = subprocess.run(['dumpbin.exe', '/dependents', dll_path], universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
dependency_names = re.findall(r'^\s{4}(\S*\.dll)$', dumpbin.stdout, re.MULTILINE)

dependencies = []
for dependency_name in dependency_names:
for search_path in search_paths:
dependency_direct_full_path = os.path.join(search_path, dependency_name)
if os.path.isfile(dependency_direct_full_path):
dependencies.append(dependency_direct_full_path)
break

dependencies_recursive = []
for dependency_direct_full_path in dependencies:
dependencies_recursive.extend(find_dll_dependencies_recursively(dependency_direct_full_path, search_paths))

dependencies.extend(dependencies_recursive)

return list(set(dependencies))


def find_libraries(library_stems, search_paths, extension):
library_paths = []

Expand Down Expand Up @@ -163,23 +185,8 @@ def __init__(self, name, sources, *args, **kw):
"leptonica"
]

# indentations with respect to dependency
runtime_libraries = [
"tesseract",
"leptonica",
"gif",
"jpeg",
"openjp2",
"png",
"zlib",
"tiff",
"lzma",
"webpmux",
"webp"
]


runtime_library_paths = find_libraries(runtime_libraries, [vcpkg_bin, tesseract_bin], "dll")
runtime_library_paths = find_libraries(["tesseract"], [tesseract_bin], "dll")
runtime_library_paths.extend(find_dll_dependencies_recursively(runtime_library_paths[0], [vcpkg_bin, tesseract_bin]))
_LOGGER.info("runtime libraries found:\n\t{}".format("\n\t".join(runtime_library_paths)))

build_dependency_paths = find_libraries(build_dependencies, [vcpkg_lib, tesseract_lib], "lib")
Expand Down
2 changes: 1 addition & 1 deletion tesserocr