From 15894ab1b504a7540861025bba138371ed3fdd9f Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 25 Apr 2024 14:24:58 +0200 Subject: [PATCH 1/2] Windows specific prefix check (magick) --- src/invoice2data/input/tesseract.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/invoice2data/input/tesseract.py b/src/invoice2data/input/tesseract.py index 7bea12dd..b5fbef01 100644 --- a/src/invoice2data/input/tesseract.py +++ b/src/invoice2data/input/tesseract.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +import platform import shutil import tempfile import mimetypes @@ -32,17 +32,21 @@ def to_text(path: str, area_details: dict = None): """ # Check for dependencies. Needs Tesseract and Imagemagick installed. + current_platform = platform.platform() + if current_platform.startswith("win32"): + convert_command_prefix = "magick" + else: + convert_command_prefix = "convert" if not shutil.which("tesseract"): raise EnvironmentError("tesseract not installed.") - if not shutil.which("convert"): + if not shutil.which(convert_command_prefix): raise EnvironmentError("imagemagick not installed.") language = get_languages() logger.debug("tesseract language arg is, %s", language) timeout = 180 - # convert the (multi-page) pdf file to a 300dpi png - convert = [ + convert = [convert_command_prefix] + [ "convert", "-units", "PixelsPerInch", From 8eef4187d59f5c2905e1be41d1cd6c2760cfdb9a Mon Sep 17 00:00:00 2001 From: alexm96 <38464076+alexm96@users.noreply.github.com> Date: Thu, 25 Apr 2024 14:33:38 +0200 Subject: [PATCH 2/2] testing runners --- .github/workflows/main.yml | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fb492843..0824fe36 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,6 +1,11 @@ name: Test - -on: [push, pull_request] +on: + push: + branches: + - testing + pull_request: + branches: + - testing jobs: test: @@ -9,24 +14,36 @@ jobs: strategy: matrix: python-version: [3.7, 3.8, 3.9, 3.10.x, 3.11] - os: [ubuntu-latest] + os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} + - name: Install system dependencies (Linux) + if: runner.os == 'Linux' run: | sudo apt update sudo apt install tesseract-ocr poppler-utils imagemagick ghostscript pip install -U ocrmypdf + + - name: Install system dependencies (Windows) + if: runner.os == 'Windows' + run: | + choco install tesseract poppler imagemagick ghostscript + pip install -U ocrmypdf + - name: Install testing dependencies run: | pip install -U wheel pip pip install -U ".[test]" + - name: Lint with flake8 run: flake8 + - name: Test with pytest run: pytest