Skip to content

Commit

Permalink
Merge pull request #39 from munach/30-improve-highlight-extraction
Browse files Browse the repository at this point in the history
30 improve highlight extraction
  • Loading branch information
floxdeveloper authored Feb 9, 2025
2 parents 10cc5a6 + 05d4c97 commit 16a9eeb
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 12 deletions.
7 changes: 4 additions & 3 deletions src/extractHighlight.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ export function extractHighlight(annot: any, items: any) {
quad[0].y
);
const res = searchQuad(minx, maxx, miny, maxy, items);
if (txt.substring(txt.length - 1) != "-") {
return txt + " " + res; // concatenate lines by 'blank'
// if the last character of txt (previous lines) is not a hyphen, we concatenate the lines, by adding a blank
if (txt != "" && txt.substring(txt.length - 1) != "-") {
return txt + " " + res;
} else if (
txt.substring(txt.length - 2).toLowerCase() ==
txt.substring(txt.length - 2) && // end by lowercase-
Expand All @@ -78,7 +79,7 @@ export function extractHighlight(annot: any, items: any) {
// and start with lowercase
return txt.substring(0, txt.length - 1) + res; // remove hyphon
} else {
return txt + res; // keep hyphon
return txt + res; // keep hyphon or if the previous text is empty, return the whole result
}
}, "");
return highlight;
Expand Down
117 changes: 108 additions & 9 deletions test/extractHighlight.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,117 @@ beforeEach(() => {
jest.clearAllMocks();
});

describe('extractHighlight', () => {
describe('extractHighlight - simple text', () => {
const items = [
{ str: 'Hello', transform: [0, 0, 0, 0, 10, 10], width: 50 },
{ str: 'World', transform: [0, 0, 0, 0, 60, 10], width: 50 },
{ str: 'diese', transform: [12.000000267999969, 0, 0, 12.000000267999969, 71.50000108483317, 715.2499987979169], width: 28.68748864068716 },
{ str: '(S. 1)', transform: [12.000000267999969, 0, 0, 12.000000267999969, 52.00000064933322, 685.2499981279169], width: 29.33788865521276 },
{ str: 'Word,', transform: [12.000000267999969, 0, 0, 12.000000267999969, 71.50000108483317, 655.2499974579171], width: 31.78710370991189 },
{ str: '(S. 1)', transform: [12.000000267999969, 0, 0, 12.000000267999969, 52.00000064933322, 625.2499967879171], width: 29.33788865521276 },
{ str: 'Lesen', transform: [12.000000267999969, 0, 0, 12.000000267999969, 71.50000108483317, 595.2499961179171], width: 32.69529673019486 },
{ str: '(S. 1)', transform: [12.000000267999969, 0, 0, 12.000000267999969, 52.00000064933322, 565.2499954479173], width: 29.33788865521276 },
];
const annot = {
quadPoints: [
[{ x: 10, y: 10 }, { x: 60, y: 10 }, { x: 60, y: 20 }, { x: 10, y: 20 }],
],
};
test('should extract highlighted text', () => {
const annot = {
quadPoints: [70.636, 634.118, 81.304, 634.118, 70.636, 622.742, 81.304, 622.742],
};
const result = extractHighlight(annot, items);
expect(result).toBe('Hello World');
expect(result).toBe('1)');
});
test('should extract highlighted text', () => {
const annot = {
quadPoints: [71.5, 603.974, 104.188, 603.974, 71.5, 595.118, 104.188, 595.118],
};
const result = extractHighlight(annot, items);
expect(result).toBe('Lesen');
});
test('should extract highlighted text', () => {
const annot = {
quadPoints: [52, 694.118, 81.304, 694.118, 52, 682.742, 81.304, 682.742],
};
const result = extractHighlight(annot, items);
expect(result).toBe('(S. 1)');
});

test('should extract highlighted text over multiple lines', () => {
const annot = {
quadPoints: [93.508, 723.974, 100.180, 723.974, 93.508, 715.118, 100.180, 715.118, 52.000, 694.118, 63.988, 694.118, 52.000, 682.742, 63.988, 682.742],

};
const result = extractHighlight(annot, items);
expect(result).toBe('e (S');
});

test('should extract highlighted letter', () => {
const annot = {
quadPoints: [71.5, 663.974, 82.816, 663.974, 71.5, 653.558, 82.816, 653.558],
};
const result = extractHighlight(annot, items);
expect(result).toBe('W');
});

test('should extract highlighted letter', () => {
const annot = {
quadPoints: [82.609, 663.974, 89.281, 663.974, 82.609, 653.558, 89.281, 653.558],
};
const result = extractHighlight(annot, items);
expect(result).toBe('o');
});

test('should extract highlighted letter', () => {
const annot = {
quadPoints: [89.281, 663.974, 93.445, 663.974, 89.281, 653.558, 93.445, 653.558],
};
const result = extractHighlight(annot, items);
expect(result).toBe('r');
});


test('should extract highlighted letter', () => {
const annot = {
quadPoints: [93.277, 663.974, 99.949, 663.974, 93.277, 653.558, 99.949, 653.558],
};
const result = extractHighlight(annot, items);
expect(result).toBe('d');
});


test('should extract highlighted letter', () => {
const annot = {
quadPoints: [99.949, 663.974, 103.273, 663.974, 99.949, 653.558, 103.273, 653.558],
};
const result = extractHighlight(annot, items);
expect(result).toBe(',');
});

test('should extract underlined word', () => {
const annot = {
quadPoints: [71.5, 603.974, 104.188, 603.974, 71.5, 594.118, 104.188, 594.118],
};
const result = extractHighlight(annot, items);
expect(result).toBe('Lesen');
});

test('should extract underlined letter', () => {
const annot = {
quadPoints: [55.996, 634.118, 63.988, 634.118, 55.996, 621.742, 63.988, 621.742],
};
const result = extractHighlight(annot, items);
expect(result).toBe('S');
});

test('should extract swiggled letter', () => {
const annot = {
quadPoints: [71.5, 663.974, 82.816, 663.974, 71.5, 652.558, 82.816, 652.558],
};
const result = extractHighlight(annot, items);
expect(result).toBe('W');
});

test('should extract squiggled word', () => {
const annot = {
quadPoints: [71.5, 723.974, 87.508, 723.974, 71.5, 714.118, 87.508, 714.118],
};
const result = extractHighlight(annot, items);
expect(result).toBe('die');
});
});

0 comments on commit 16a9eeb

Please sign in to comment.