From e426dbdbdd63538d1485a2313b00b19e8e0192be Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Thu, 23 May 2024 13:48:41 +0200 Subject: [PATCH 1/2] this includes pdfjs-to-text in .env/bin so that it's available as an executable --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2eb13a7f8..1bde4000d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,3 +70,6 @@ dependencies = [ "sentry-sdk>=1.16.0", "whitenoise>=6.0.0", ] + +[tool.setuptools.data-files] +"bin" = ["bin/pdfjs-to-text"] From 6deb29084f746570aeb3ebd268e77a06bb936c8e Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Thu, 23 May 2024 13:52:03 +0200 Subject: [PATCH 2/2] configurable path to pdfjs-to-text --- peachjam/helpers.py | 4 ++-- peachjam/settings.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/peachjam/helpers.py b/peachjam/helpers.py index 3ecad8501..bad027915 100644 --- a/peachjam/helpers.py +++ b/peachjam/helpers.py @@ -1,4 +1,3 @@ -import os import string import subprocess import tempfile @@ -6,6 +5,7 @@ from functools import wraps import martor.utils +from django.conf import settings from django.utils.translation import get_language_from_request from languages_plus.models import Language @@ -46,7 +46,7 @@ def pdfjs_to_text(fname): """Extract text from fname using pdfjs-compatible script.""" with tempfile.NamedTemporaryFile(suffix=".txt") as outf: cmd = [ - os.path.join(os.path.dirname(__file__), "..", "bin", "pdfjs-to-text"), + settings.PEACHJAM["PDFJS_TO_TEXT"], fname, outf.name, ] diff --git a/peachjam/settings.py b/peachjam/settings.py index 6b9e3131d..861c50fcf 100644 --- a/peachjam/settings.py +++ b/peachjam/settings.py @@ -130,6 +130,7 @@ "SEARCH_JURISDICTION_FILTER": False, "MULTIPLE_JURISDICTIONS": False, "MULTIPLE_LOCALITIES": False, + "PDFJS_TO_TEXT": "bin/pdfjs-to-text", } PEACHJAM["ES_INDEX"] = os.environ.get("ES_INDEX", slugify(PEACHJAM["APP_NAME"]))