Skip to content

Commit

Permalink
Fixed extractText bug from previous commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Sep 2, 2024
1 parent 0705925 commit a5fea4f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 2 deletions.
2 changes: 1 addition & 1 deletion js/clear.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { ImageCache } from './containers/imageContainer.js';
import { replaceObjectProperties } from './utils/miscUtils.js';

export function clearData() {
inputData.pageCount = 0;
inputData.clear();
replaceObjectProperties(ocrAll, { active: [] });
replaceObjectProperties(ocrAllRaw, { active: [] });
layoutRegions.pages.length = 0;
Expand Down
11 changes: 11 additions & 0 deletions js/containers/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,15 @@ export class inputData {
static defaultDownloadFileName = '';

static pageCount = 0;

static clear = () => {
inputData.xmlMode.length = 0;
inputData.pdfMode = false;
inputData.imageMode = false;
inputData.resumeMode = false;
inputData.evalMode = false;
inputData.inputFileNames = [];
inputData.defaultDownloadFileName = '';
inputData.pageCount = 0;
};
}
2 changes: 1 addition & 1 deletion scribe.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ const init = async (params) => {
* @param {boolean} [options.skipRecPDFTextOCR=false] - If the input is an image-native PDF with existing OCR layer, skip recognition and return the existing text.
*/
const extractText = async (files, langs = ['eng'], outputFormat = 'txt', options = {}) => {
if (!inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode) throw new Error('No relevant files to process.');
const skipRecPDFTextNative = options?.skipRecPDFTextNative ?? true;
const skipRecPDFTextOCR = options?.skipRecPDFTextOCR ?? false;
init({ ocr: true, font: true });
await importFiles(files, { extractPDFTextNative: skipRecPDFTextNative, extractPDFTextOCR: skipRecPDFTextOCR });
if (!inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode) throw new Error('No relevant files to process.');
const skipRecPDF = inputData.pdfMode && (ImageCache.pdfType === 'text' && skipRecPDFTextNative || ImageCache.pdfType === 'ocr' && skipRecPDFTextOCR);
const skipRecOCR = inputData.xmlMode[0] && !inputData.imageMode && !inputData.pdfMode;
if (!skipRecPDF && !skipRecOCR) await recognize({ langs });
Expand Down

0 comments on commit a5fea4f

Please sign in to comment.