-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_scribejs.js
34 lines (27 loc) · 1.09 KB
/
run_scribejs.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import fs from 'fs';
import path from 'path';
import scribe from 'scribe.js-ocr';
// Directory containing images
const imgDir = 'img';
// Directory to store .hocr files
const outputDir = 'results/scribejs';
async function processImages() {
const files = fs.readdirSync(imgDir).filter((file) => {
const ext = path.extname(file).toLowerCase();
return ext === '.png' || ext === '.jpeg' || ext === '.jpg';
});
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir);
}
for (const file of files) {
const filePath = path.join(imgDir, file);
const outputFilePath = path.join(outputDir, `${path.basename(file, path.extname(file))}.hocr`);
console.log(`Processing ${file}`);
// This is intentionally killing the worker with each image so that all results are reproducible and not impacted by previous images.
// This is inefficient and should be avoided in production code.
const hocr = await scribe.extractText([filePath], ['eng'], 'hocr');
await scribe.terminate();
fs.writeFileSync(outputFilePath, hocr);
}
}
processImages().catch((err) => console.error(err));