-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6416dfa
commit 3546f53
Showing
14 changed files
with
536 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
128 changes: 128 additions & 0 deletions
128
packages/block-tools/src/HtmlDeserializer/rules/office-online.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import {BLOCK_DEFAULT_STYLE, DEFAULT_BLOCK, DEFAULT_SPAN} from '../../constants' | ||
import {DeserializerRule} from '../../types' | ||
import {isElement, tagName} from '../helpers' | ||
import {spanRule} from './span' | ||
|
||
export function createOfficeOnlineRules(): Array<DeserializerRule> { | ||
return [ | ||
{ | ||
// Headings | ||
deserialize(el, next) { | ||
if (isElement(el) && tagName(el) === 'p') { | ||
const role = el.getAttribute('role') | ||
const levelRaw = el.getAttribute('aria-level') | ||
const level = | ||
typeof levelRaw === 'string' | ||
? Number.parseInt(levelRaw, 10) | ||
: undefined | ||
const style = level !== undefined ? `h${level}` : undefined | ||
|
||
if (role === 'heading' && style !== undefined) { | ||
return { | ||
...DEFAULT_BLOCK, | ||
style, | ||
children: next(el.childNodes), | ||
} | ||
} | ||
} | ||
}, | ||
}, | ||
{ | ||
// Lists | ||
deserialize(el, next) { | ||
if (isElement(el) && tagName(el) === 'li') { | ||
const ariaLevel = el.getAttribute('data-aria-level') | ||
const level = ariaLevel ? Number.parseInt(ariaLevel, 10) : undefined | ||
|
||
const parentElement = el.parentElement | ||
const listItem = | ||
parentElement && tagName(parentElement) === 'ul' | ||
? 'bullet' | ||
: tagName(parentElement) === 'ol' | ||
? 'number' | ||
: undefined | ||
|
||
if (!listItem || level === undefined) { | ||
return | ||
} | ||
|
||
return { | ||
...DEFAULT_BLOCK, | ||
...(listItem !== undefined && level !== undefined | ||
? {listItem, level} | ||
: {}), | ||
children: next(el.childNodes), | ||
style: BLOCK_DEFAULT_STYLE, | ||
} | ||
} | ||
}, | ||
}, | ||
{ | ||
// Spans | ||
deserialize(el, next, block) { | ||
// if (isElement(el) && tagName(el) === 'span') { | ||
// console.log('span', {text:el.textContent}, el.classList.contains('EmptyTextRun')) | ||
// } | ||
|
||
if ( | ||
isElement(el) && | ||
tagName(el) === 'span' && | ||
el.classList.contains('EOP') | ||
) { | ||
// return { | ||
// ...DEFAULT_SPAN, | ||
// text: '', | ||
// } | ||
// console.log('span', {text: el.textContent}, el.classList.toString()) | ||
return undefined | ||
} | ||
|
||
if ( | ||
isElement(el) && | ||
tagName(el) === 'span' && | ||
el.classList.contains('TextRun') | ||
) { | ||
const marks: Array<string> = [] | ||
const style = el.getAttribute('style') | ||
|
||
if (style) { | ||
if (/font-style\s*:\s*italic/.test(style)) { | ||
marks.push('em') | ||
} | ||
|
||
if (/font-weight\s*:\s*bold/.test(style)) { | ||
marks.push('strong') | ||
} | ||
|
||
if (/text-decoration\s*:\s*underline/.test(style)) { | ||
if (tagName(el.parentNode) !== 'a') { | ||
marks.push('underline') | ||
} | ||
} | ||
} | ||
|
||
const text = (el.textContent ?? '').replace(/\s\s+/g, ' ') | ||
|
||
// console.log(text === el.textContent) | ||
|
||
// console.log({text}) | ||
|
||
return { | ||
...DEFAULT_SPAN, | ||
marks, | ||
text, | ||
} | ||
} | ||
|
||
return undefined | ||
}, | ||
}, | ||
] | ||
} | ||
|
||
function isOfficeOnlineElement(element: Element) { | ||
return ( | ||
element.classList.contains('TextRun') || | ||
element.classList.contains('NormalTextRun') | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import {DEFAULT_SPAN} from '../../constants' | ||
import {DeserializerRule} from '../../types' | ||
import {isElement, tagName} from '../helpers' | ||
|
||
export const spanRule: DeserializerRule = { | ||
deserialize: (node, next) => { | ||
if ( | ||
isElement(node) && | ||
tagName(node) === 'span' && | ||
node.childNodes.length === 1 && | ||
node.childNodes[0].nodeName === '#text' | ||
) { | ||
return { | ||
...DEFAULT_SPAN, | ||
marks: [], | ||
text: (node.childNodes[0].textContent ?? '').replace(/\s\s+/g, ' '), | ||
} | ||
} | ||
|
||
return undefined | ||
}, | ||
} |
1 change: 1 addition & 0 deletions
1
packages/block-tools/test/html-to-blocks/office-online-headings.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
<meta charset='utf-8'><div class="OutlineElement Ltr SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; clear: both; cursor: text; overflow: visible; position: relative; direction: ltr; color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web", Arial, Verdana, sans-serif; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><p class="Paragraph SCXW104514979 BCX0" role="heading" aria-level="1" xml:lang="EN-US" lang="EN-US" paraid="1941180722" paraeid="{aa52cbe1-de22-42de-8a6b-3bf858441b21}{162}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 24px 0px 5.33333px; padding: 0px; user-select: text; overflow-wrap: break-word; white-space: pre-wrap; font-weight: normal; font-style: normal; vertical-align: baseline; font-kerning: none; background-color: transparent; color: rgb(15, 71, 97); text-align: left; text-indent: 0px;"><span data-contrast="none" xml:lang="EN-US" lang="EN-US" class="TextRun SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-variant-ligatures: none !important; color: rgb(15, 71, 97); font-size: 20pt; font-style: normal; line-height: 36.0375px; font-family: "Aptos Display", "Aptos Display_EmbeddedFont", "Aptos Display_MSFontService", sans-serif;"><span class="NormalTextRun SCXW104514979 BCX0" data-ccp-parastyle="heading 1" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;">Headin</span><span class="NormalTextRun SCXW104514979 BCX0" data-ccp-parastyle="heading 1" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;">g 1</span></span><span class="EOP SCXW104514979 BCX0" data-ccp-props="{"134245418":true,"134245529":true,"335559738":360,"335559739":80}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-size: 20pt; line-height: 36.0375px; font-family: "Aptos Display", "Aptos Display_EmbeddedFont", "Aptos Display_MSFontService", sans-serif; color: rgb(15, 71, 97);"> </span></p></div><div class="OutlineElement Ltr SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; clear: both; cursor: text; overflow: visible; position: relative; direction: ltr; color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web", Arial, Verdana, sans-serif; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><p class="Paragraph SCXW104514979 BCX0" role="heading" aria-level="2" xml:lang="EN-US" lang="EN-US" paraid="2032552764" paraeid="{6b2bb504-0a9f-47cd-a86d-c572a25d51fa}{100}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 10.6667px 0px 5.33333px; padding: 0px; user-select: text; overflow-wrap: break-word; white-space: pre-wrap; font-weight: normal; font-style: normal; vertical-align: baseline; font-kerning: none; background-color: transparent; color: rgb(15, 71, 97); text-align: left; text-indent: 0px;"><span data-contrast="none" xml:lang="EN-US" lang="EN-US" class="TextRun SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-variant-ligatures: none !important; color: rgb(15, 71, 97); font-size: 16pt; font-style: normal; line-height: 27.9px; font-family: "Aptos Display", "Aptos Display_EmbeddedFont", "Aptos Display_MSFontService", sans-serif;"><span class="NormalTextRun SCXW104514979 BCX0" data-ccp-parastyle="heading 2" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;">Heading 2</span></span><span class="EOP SCXW104514979 BCX0" data-ccp-props="{"134245418":true,"134245529":true,"335559738":160,"335559739":80}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-size: 16pt; line-height: 27.9px; font-family: "Aptos Display", "Aptos Display_EmbeddedFont", "Aptos Display_MSFontService", sans-serif; color: rgb(15, 71, 97);"> </span></p></div><div class="OutlineElement Ltr SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; clear: both; cursor: text; overflow: visible; position: relative; direction: ltr; color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web", Arial, Verdana, sans-serif; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><p class="Paragraph SCXW104514979 BCX0" role="heading" aria-level="3" xml:lang="EN-US" lang="EN-US" paraid="428206664" paraeid="{6b2bb504-0a9f-47cd-a86d-c572a25d51fa}{176}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 10.6667px 0px 5.33333px; padding: 0px; user-select: text; overflow-wrap: break-word; white-space: pre-wrap; font-weight: normal; font-style: normal; vertical-align: baseline; font-kerning: none; background-color: transparent; color: rgb(15, 71, 97); text-align: left; text-indent: 0px;"><span data-contrast="none" xml:lang="EN-US" lang="EN-US" class="TextRun SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-variant-ligatures: none !important; color: rgb(15, 71, 97); font-size: 14pt; font-style: normal; line-height: 25.575px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif;"><span class="NormalTextRun SCXW104514979 BCX0" data-ccp-parastyle="heading 3" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;">Heading 3</span></span><span class="EOP SCXW104514979 BCX0" data-ccp-props="{"134245418":true,"134245529":true,"335559738":160,"335559739":80}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-size: 14pt; line-height: 25.575px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif; color: rgb(15, 71, 97);"> </span></p></div><div class="OutlineElement Ltr SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; clear: both; cursor: text; overflow: visible; position: relative; direction: ltr; color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web", Arial, Verdana, sans-serif; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><p class="Paragraph SCXW104514979 BCX0" role="heading" aria-level="4" xml:lang="EN-US" lang="EN-US" paraid="915197819" paraeid="{6b2bb504-0a9f-47cd-a86d-c572a25d51fa}{236}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 5.33333px 0px 2.66667px; padding: 0px; user-select: text; overflow-wrap: break-word; white-space: pre-wrap; font-weight: normal; font-style: italic; vertical-align: baseline; font-kerning: none; background-color: transparent; color: rgb(15, 71, 97); text-align: left; text-indent: 0px;"><span data-contrast="none" xml:lang="EN-US" lang="EN-US" class="TextRun SCXW104514979 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-variant-ligatures: none !important; color: rgb(15, 71, 97); font-size: 12pt; font-style: normal; line-height: 20.925px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif;"><span class="NormalTextRun SCXW104514979 BCX0" data-ccp-parastyle="heading 4" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;">Heading</span><span class="NormalTextRun SCXW104514979 BCX0" data-ccp-parastyle="heading 4" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;"> 4</span></span><span class="EOP SCXW104514979 BCX0" data-ccp-props="{"134245418":true,"134245529":true,"335559738":80,"335559739":40}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-size: 12pt; line-height: 20.925px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif; color: rgb(15, 71, 97);"> </span></p></div> |
67 changes: 67 additions & 0 deletions
67
packages/block-tools/test/html-to-blocks/office-online-headings.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import fs from 'node:fs' | ||
import path from 'node:path' | ||
import {JSDOM} from 'jsdom' | ||
import {expect, test} from 'vitest' | ||
import {htmlToBlocks} from '../../src' | ||
import defaultSchema from '../fixtures/defaultSchema' | ||
import {createTestKeyGenerator} from '../test-key-generator' | ||
|
||
const blockContentType = defaultSchema | ||
.get('blogPost') | ||
.fields.find((field: any) => field.name === 'body').type | ||
|
||
const html = fs | ||
.readFileSync(path.resolve(__dirname, 'office-online-headings.html')) | ||
.toString() | ||
|
||
const keyGenerator = createTestKeyGenerator() | ||
|
||
test(htmlToBlocks.name, () => { | ||
expect( | ||
htmlToBlocks(html, blockContentType, { | ||
parseHtml: (html) => new JSDOM(html).window.document, | ||
keyGenerator, | ||
}), | ||
).toMatchObject([ | ||
{ | ||
_type: 'block', | ||
children: [ | ||
{ | ||
_type: 'span', | ||
text: 'Heading 1', | ||
}, | ||
], | ||
style: 'h1', | ||
}, | ||
{ | ||
_type: 'block', | ||
children: [ | ||
{ | ||
_type: 'span', | ||
text: 'Heading 2', | ||
}, | ||
], | ||
style: 'h2', | ||
}, | ||
{ | ||
_type: 'block', | ||
children: [ | ||
{ | ||
_type: 'span', | ||
text: 'Heading 3', | ||
}, | ||
], | ||
style: 'h3', | ||
}, | ||
{ | ||
_type: 'block', | ||
children: [ | ||
{ | ||
_type: 'span', | ||
text: 'Heading 4', | ||
}, | ||
], | ||
style: 'h4', | ||
}, | ||
]) | ||
}) |
1 change: 1 addition & 0 deletions
1
packages/block-tools/test/html-to-blocks/office-online-link.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
<meta charset='utf-8'><a class="Hyperlink SCXW204275500 BCX0" href="https://example.com/" target="_blank" rel="noreferrer noopener" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; cursor: text; font-family: "Segoe UI", "Segoe UI Web", Arial, Verdana, sans-serif; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: left; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: pre-wrap; background-color: rgb(255, 255, 255); text-decoration: none; color: inherit;"><span data-contrast="none" xml:lang="EN-US" lang="EN-US" class="TextRun Underlined SCXW204275500 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-variant-ligatures: none !important; color: rgb(70, 120, 134); font-size: 12pt; text-decoration: underline; line-height: 20.925px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif; font-weight: normal;"><span class="NormalTextRun SCXW204275500 BCX0" data-ccp-charstyle="Hyperlink" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text;">a link</span></span></a><span data-contrast="auto" xml:lang="EN-US" lang="EN-US" class="TextRun EmptyTextRun SCXW204275500 BCX0" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; font-variant-ligatures: none !important; color: rgb(0, 0, 0); font-style: normal; font-variant-caps: normal; font-weight: normal; letter-spacing: normal; orphans: 2; text-align: left; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: pre-wrap; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; font-size: 12pt; line-height: 20.925px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif;"></span><span class="EOP SCXW204275500 BCX0" data-ccp-props="{}" style="-webkit-user-drag: none; -webkit-tap-highlight-color: transparent; margin: 0px; padding: 0px; user-select: text; color: rgb(0, 0, 0); font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: left; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: pre-wrap; background-color: rgb(255, 255, 255); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; font-size: 12pt; line-height: 20.925px; font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, sans-serif;"> </span> |
Oops, something went wrong.