Fixed extractText bug from previous commit

scribeocr · Sep 2, 2024 · a5fea4f · a5fea4f
1 parent 0705925
commit a5fea4f
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 2 deletions.
diff --git a/js/clear.js b/js/clear.js
@@ -13,7 +13,7 @@ import { ImageCache } from './containers/imageContainer.js';
 import { replaceObjectProperties } from './utils/miscUtils.js';
 
 export function clearData() {
-  inputData.pageCount = 0;
+  inputData.clear();
   replaceObjectProperties(ocrAll, { active: [] });
   replaceObjectProperties(ocrAllRaw, { active: [] });
   layoutRegions.pages.length = 0;

diff --git a/js/containers/app.js b/js/containers/app.js
@@ -80,4 +80,15 @@ export class inputData {
   static defaultDownloadFileName = '';
 
   static pageCount = 0;
+
+  static clear = () => {
+    inputData.xmlMode.length = 0;
+    inputData.pdfMode = false;
+    inputData.imageMode = false;
+    inputData.resumeMode = false;
+    inputData.evalMode = false;
+    inputData.inputFileNames = [];
+    inputData.defaultDownloadFileName = '';
+    inputData.pageCount = 0;
+  };
 }
diff --git a/scribe.js b/scribe.js
@@ -79,11 +79,11 @@ const init = async (params) => {
  * @param {boolean} [options.skipRecPDFTextOCR=false] - If the input is an image-native PDF with existing OCR layer, skip recognition and return the existing text.
  */
 const extractText = async (files, langs = ['eng'], outputFormat = 'txt', options = {}) => {
-  if (!inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode) throw new Error('No relevant files to process.');
   const skipRecPDFTextNative = options?.skipRecPDFTextNative ?? true;
   const skipRecPDFTextOCR = options?.skipRecPDFTextOCR ?? false;
   init({ ocr: true, font: true });
   await importFiles(files, { extractPDFTextNative: skipRecPDFTextNative, extractPDFTextOCR: skipRecPDFTextOCR });
+  if (!inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode) throw new Error('No relevant files to process.');
   const skipRecPDF = inputData.pdfMode && (ImageCache.pdfType === 'text' && skipRecPDFTextNative || ImageCache.pdfType === 'ocr' && skipRecPDFTextOCR);
   const skipRecOCR = inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode;
   if (!skipRecPDF && !skipRecOCR) await recognize({ langs });