From a5fea4f09b25747cfbcdb6c166ba2c1f2a7665da Mon Sep 17 00:00:00 2001 From: Balearica Date: Mon, 2 Sep 2024 10:04:26 -0700 Subject: [PATCH] Fixed extractText bug from previous commit --- js/clear.js | 2 +- js/containers/app.js | 11 +++++++++++ scribe.js | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/js/clear.js b/js/clear.js index 6c43fb4..deda927 100644 --- a/js/clear.js +++ b/js/clear.js @@ -13,7 +13,7 @@ import { ImageCache } from './containers/imageContainer.js'; import { replaceObjectProperties } from './utils/miscUtils.js'; export function clearData() { - inputData.pageCount = 0; + inputData.clear(); replaceObjectProperties(ocrAll, { active: [] }); replaceObjectProperties(ocrAllRaw, { active: [] }); layoutRegions.pages.length = 0; diff --git a/js/containers/app.js b/js/containers/app.js index e227a73..87158f9 100644 --- a/js/containers/app.js +++ b/js/containers/app.js @@ -80,4 +80,15 @@ export class inputData { static defaultDownloadFileName = ''; static pageCount = 0; + + static clear = () => { + inputData.xmlMode.length = 0; + inputData.pdfMode = false; + inputData.imageMode = false; + inputData.resumeMode = false; + inputData.evalMode = false; + inputData.inputFileNames = []; + inputData.defaultDownloadFileName = ''; + inputData.pageCount = 0; + }; } diff --git a/scribe.js b/scribe.js index 879548b..2ca4dce 100644 --- a/scribe.js +++ b/scribe.js @@ -79,11 +79,11 @@ const init = async (params) => { * @param {boolean} [options.skipRecPDFTextOCR=false] - If the input is an image-native PDF with existing OCR layer, skip recognition and return the existing text. */ const extractText = async (files, langs = ['eng'], outputFormat = 'txt', options = {}) => { - if (!inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode) throw new Error('No relevant files to process.'); const skipRecPDFTextNative = options?.skipRecPDFTextNative ?? true; const skipRecPDFTextOCR = options?.skipRecPDFTextOCR ?? false; init({ ocr: true, font: true }); await importFiles(files, { extractPDFTextNative: skipRecPDFTextNative, extractPDFTextOCR: skipRecPDFTextOCR }); + if (!inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode) throw new Error('No relevant files to process.'); const skipRecPDF = inputData.pdfMode && (ImageCache.pdfType === 'text' && skipRecPDFTextNative || ImageCache.pdfType === 'ocr' && skipRecPDFTextOCR); const skipRecOCR = inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode; if (!skipRecPDF && !skipRecOCR) await recognize({ langs });