Skip to content

Commit

Permalink
Merge pull request #1 from prezly/fix/care-1352-pasting-from-word-mis…
Browse files Browse the repository at this point in the history
…sing-spaces

[CARE-1352] Do not consider whitespace-only elements empty
  • Loading branch information
e1himself authored Apr 4, 2023
2 parents 29079fa + d4d48ba commit 04bb2ef
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 1 deletion.
21 changes: 21 additions & 0 deletions src/__tests__/input/whitespaces-4.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<title></title>
<meta name="generator" content="LibreOffice 7.4.6.2 (Linux)"/>
<style type="text/css">
@page { size: 8.5in 11in; margin: 0.79in }
p { line-height: 115%; text-align: left; orphans: 2; widows: 2; margin-bottom: 0.1in; direction: ltr; background: transparent }
p.western { font-size: 11pt; so-language: en-US }
p.cjk { font-size: 11pt; so-language: en-US }
p.ctl { font-size: 11pt }
a:visited { color: #800080; text-decoration: underline }
a:link { color: #0000ff; text-decoration: underline }
</style>
</head>
<body lang="nl-BE" link="#0000ff" vlink="#800080" dir="ltr"><p align="justify" style="line-height: 100%; margin-left: 0.5in; margin-bottom: 0in">
<font color="#003768"> <font face="Calibri, serif"><font face="Calibri, serif"><span lang="en-GB"><b>euros</b></span></font></font><font color="#000000"><font face="Calibri, serif"><font size="3" style="font-size: 12pt"><span lang="en-GB">
</span></font></font></font><font color="#000000"><font face="Calibri, serif"><span lang="en-GB">in</span></font></font></font></p>
</body>
</html>
1 change: 1 addition & 0 deletions src/__tests__/output/whitespaces-4.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<body><p align="justify" style="line-height: 100%; margin-left: 0.5in; margin-bottom: 0in"><span color="#003768"> <font face="Calibri, serif"><font face="Calibri, serif"><span lang="en-GB"><b>euros</b></span></font></font><font color="#000000"><font face="Calibri, serif"><font size="3" style="font-size: 12pt"><span lang="en-GB"> </span></font></font></font><font color="#000000"><font face="Calibri, serif"><span lang="en-GB">in</span></font></font></span></p></body>
7 changes: 7 additions & 0 deletions src/cleanDocx.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,11 @@ describe('cleanDocx', () => {
const result = cleanDocx(html, MOCK_RTF);
expect(result).toBe(expected);
});

it('Preserves newline-only spans', () => {
const html = readTestFile('input/whitespaces-4.html');
const expected = readTestFile('output/whitespaces-4.html');
const result = cleanDocx(html, MOCK_RTF);
expect(result).toBe(expected);
});
});
2 changes: 1 addition & 1 deletion src/lib/cleanEmptyElements.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { traverseElements } from './traverseElements';
const ALLOWED_EMPTY_ELEMENTS = ['BR', 'IMG'];

function isEmpty(element: Element): boolean {
return !ALLOWED_EMPTY_ELEMENTS.includes(element.nodeName) && !element.innerHTML.trim();
return !ALLOWED_EMPTY_ELEMENTS.includes(element.nodeName) && !element.innerHTML;
}

function removeIfEmpty(element: Element): void {
Expand Down

0 comments on commit 04bb2ef

Please sign in to comment.