diff --git a/js/import/convertPageStext.js b/js/import/convertPageStext.js index 6c32ff2..0388e35 100644 --- a/js/import/convertPageStext.js +++ b/js/import/convertPageStext.js @@ -212,7 +212,7 @@ export async function convertPageStext({ ocrStr, n }) { // Label as `smallCapsAlt` rather than `smallCaps`, as we confirm the word is all caps before marking as `smallCaps`. smallCapsCurrentAlt = smallCapsCurrentAlt ?? smallCapsAltArr[smallCapsAltArr.length - 1]; - smallCapsCurrent = /(small\W?cap)|sc$/i.test(fontNameStrI); + smallCapsCurrent = /(small\W?cap)|(sc$)|(caps$)/i.test(fontNameStrI); smallCapsWord = smallCapsCurrent; if (/italic/i.test(fontNameStrI) || /-\w*ital/i.test(fontNameStrI)) { diff --git a/js/objects/ocrObjects.js b/js/objects/ocrObjects.js index 6f43430..65beaa6 100644 --- a/js/objects/ocrObjects.js +++ b/js/objects/ocrObjects.js @@ -403,8 +403,28 @@ function calcWordAngleAdj(word) { * @param {string} text */ function replaceLigatures(text) { - return text.replace(/fl/g, 'fl').replace(/fi/g, 'fi').replace(/ff/g, 'ff').replace(/ffi/g, 'ffi') - .replace(/ffl/g, 'ffl'); + return text.replace(/IJ/g, 'IJ') + .replace(/ij/g, 'ij') + .replace(/ʼn/g, 'ʼn') + .replace(/DZ/g, 'DZ') + .replace(/Dz/g, 'Dz') + .replace(/dz/g, 'dz') + .replace(/DŽ/g, 'DŽ') + .replace(/Dž/g, 'Dž') + .replace(/dž/g, 'dž') + .replace(/LJ/g, 'LJ') + .replace(/Lj/g, 'Lj') + .replace(/lj/g, 'lj') + .replace(/NJ/g, 'NJ') + .replace(/Nj/g, 'Nj') + .replace(/nj/g, 'nj') + .replace(/ff/g, 'ff') + .replace(/fi/g, 'fi') + .replace(/fl/g, 'fl') + .replace(/ffi/g, 'ffi') + .replace(/ffl/g, 'ffl') + .replace(/ſt/g, 'ſt') + .replace(/st/g, 'st'); } /**