-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
118 lines (88 loc) · 3.07 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
const imageMainFolder = process.argv[2].replace(/\/$/, '');
const textMainFolder = process.argv[3].replace(/\/$/, '');
const fs = require('fs');
const Path = require('path');
const glob = require('glob');
const naturalSort = require('javascript-natural-sort');
let result = '';
let resultCount = 0;
let divisionName = '';
let volumeInfo = '';
const imageFolders = fs.readdirSync(imageMainFolder, 'utf8').sort(naturalSort);
imageFolders.forEach(imageFolder => {
const folderRoute = `${imageMainFolder}/${imageFolder}`;
if (fs.lstatSync(folderRoute).isFile()) {
return;
};
const imageFileNames = {};
glob.sync(`${folderRoute}/**/*.jpg`)
.filter(route => /\d+-\d+-\d+[abcd]\.jpg/.exec(route))
.forEach(route => {
const imageFileName = Path.basename(route, '.jpg');
imageFileNames[imageFileName] = true;
});
const textFolder = imageFolder.replace(/-\d+$/, '');
const textRoutes = glob.sync(`${textMainFolder}/${textFolder}/**/*.xml`).sort(naturalSort);
let pbs = [];
textRoutes.forEach(textRoute => {
const text = fs.readFileSync(textRoute, 'utf8');
divisionName = getDivisionName(text) || divisionName;
volumeInfo = getVolumeInfo(text, divisionName) || volumeInfo;
const pbsSubset = text.replace(/<pb/g, 'delim~!@#$%<pb').split('delim~!@#$%');
pbs = pbs.concat(pbsSubset.slice(1));
});
result += `${volumeInfo}\n\n============================================================\n\n`;
pbs.forEach(pb => {
const pbId = /<pb id="(.+?)"/.exec(pb)[1];
const hasImage = imageFileNames[pbId];
const pbHasText = pb.replace(/<[^>]+?>/g, '')
.replace(/[a-zA-Z0-9]+/g, '')
.trim();
if (! hasImage) {
if (resultCount > 0) {
result += '\n\n';
}
result += `${pbId} 目前圖檔版本的這一頁缺圖。\n\n`;
resultCount = 0;
}
else if (! pbHasText) {
if (resultCount > 0) {
result += '\n\n';
}
result += `${pbId} 目前圖檔版本的這一頁可能沒有文字;或目前圖檔版本的這一頁缺圖,所以暫時用空白的圖檔代替。\n\n`;
resultCount = 0;
}
else {
result += `${pbId} , `;
resultCount++;
if (6 === resultCount) {
result += '\n\n';
resultCount = 0;
}
}
delete imageFileNames[pbId];
});
result += '\n\n============================================================\n\n';
checkImageHasNoPb(imageFileNames);
});
function getDivisionName(text) {
if (/<division/.test(text)) {
return /<division[^>]+?tw="(.+?)"/.exec(text)[1];
}
return null;
}
function getVolumeInfo(text, divisionName) {
if (/<vol/.test(text)) {
const volumeN = /<vol n="(.+?)"/.exec(text)[1];
const boName = /<vol[^>]+?bo="(.+?)"/.exec(text)[1];
return `第 ${volumeN} 函, ${divisionName} ${boName}`;
}
return null;
}
function checkImageHasNoPb(leftImageFileNames) {
let fileNames = Object.keys(leftImageFileNames);
if (fileNames.length > 0) {
console.log('These image has no pb text: ' + fileNames.join(', '));
}
}
fs.writeFileSync(`./result.txt`, result, 'utf8');