diff --git a/packages/block-tools/src/HtmlDeserializer/index.ts b/packages/block-tools/src/HtmlDeserializer/index.ts index f29e01e5..9a5cc0d6 100644 --- a/packages/block-tools/src/HtmlDeserializer/index.ts +++ b/packages/block-tools/src/HtmlDeserializer/index.ts @@ -80,11 +80,14 @@ export default class HtmlDeserializer { const fragment = parseHtml(html) const children = Array.from(fragment.childNodes) as HTMLElement[] // Ensure that there are no blocks within blocks, and trim whitespace - const blocks = trimWhitespace( - flattenNestedBlocks( - ensureRootIsBlocks(this.deserializeElements(children)), - ), - ) + const deserializedElements = this.deserializeElements(children) + // console.log('deserializedElements', deserializedElements) + const rawBlocks = ensureRootIsBlocks(deserializedElements) + // console.log('rawBlocks', rawBlocks) + const flattenedBlocks = flattenNestedBlocks(rawBlocks) + // console.log('flattenedBlocks', flattenedBlocks) + const blocks = trimWhitespace(flattenedBlocks) + // console.log('blocks', blocks) if (this._markDefs.length > 0) { blocks diff --git a/packages/block-tools/src/HtmlDeserializer/rules/index.ts b/packages/block-tools/src/HtmlDeserializer/rules/index.ts index 9e3fe992..b7eee738 100644 --- a/packages/block-tools/src/HtmlDeserializer/rules/index.ts +++ b/packages/block-tools/src/HtmlDeserializer/rules/index.ts @@ -3,6 +3,7 @@ import type {BlockEnabledFeatures, DeserializerRule} from '../../types' import createGDocsRules from './gdocs' import createHTMLRules from './html' import createNotionRules from './notion' +import {createOfficeOnlineRules} from './office-online' import createWordRules from './word' export function createRules( @@ -13,6 +14,7 @@ export function createRules( ...createWordRules(), ...createNotionRules(blockContentType), ...createGDocsRules(blockContentType, options), + ...createOfficeOnlineRules(), ...createHTMLRules(blockContentType, options), ] } diff --git a/packages/block-tools/src/HtmlDeserializer/rules/office-online.ts b/packages/block-tools/src/HtmlDeserializer/rules/office-online.ts new file mode 100644 index 00000000..5308842e --- /dev/null +++ b/packages/block-tools/src/HtmlDeserializer/rules/office-online.ts @@ -0,0 +1,128 @@ +import {BLOCK_DEFAULT_STYLE, DEFAULT_BLOCK, DEFAULT_SPAN} from '../../constants' +import {DeserializerRule} from '../../types' +import {isElement, tagName} from '../helpers' +import {spanRule} from './span' + +export function createOfficeOnlineRules(): Array { + return [ + { + // Headings + deserialize(el, next) { + if (isElement(el) && tagName(el) === 'p') { + const role = el.getAttribute('role') + const levelRaw = el.getAttribute('aria-level') + const level = + typeof levelRaw === 'string' + ? Number.parseInt(levelRaw, 10) + : undefined + const style = level !== undefined ? `h${level}` : undefined + + if (role === 'heading' && style !== undefined) { + return { + ...DEFAULT_BLOCK, + style, + children: next(el.childNodes), + } + } + } + }, + }, + { + // Lists + deserialize(el, next) { + if (isElement(el) && tagName(el) === 'li') { + const ariaLevel = el.getAttribute('data-aria-level') + const level = ariaLevel ? Number.parseInt(ariaLevel, 10) : undefined + + const parentElement = el.parentElement + const listItem = + parentElement && tagName(parentElement) === 'ul' + ? 'bullet' + : tagName(parentElement) === 'ol' + ? 'number' + : undefined + + if (!listItem || level === undefined) { + return + } + + return { + ...DEFAULT_BLOCK, + ...(listItem !== undefined && level !== undefined + ? {listItem, level} + : {}), + children: next(el.childNodes), + style: BLOCK_DEFAULT_STYLE, + } + } + }, + }, + { + // Spans + deserialize(el, next, block) { + // if (isElement(el) && tagName(el) === 'span') { + // console.log('span', {text:el.textContent}, el.classList.contains('EmptyTextRun')) + // } + + if ( + isElement(el) && + tagName(el) === 'span' && + el.classList.contains('EOP') + ) { + // return { + // ...DEFAULT_SPAN, + // text: '', + // } + // console.log('span', {text: el.textContent}, el.classList.toString()) + return undefined + } + + if ( + isElement(el) && + tagName(el) === 'span' && + el.classList.contains('TextRun') + ) { + const marks: Array = [] + const style = el.getAttribute('style') + + if (style) { + if (/font-style\s*:\s*italic/.test(style)) { + marks.push('em') + } + + if (/font-weight\s*:\s*bold/.test(style)) { + marks.push('strong') + } + + if (/text-decoration\s*:\s*underline/.test(style)) { + if (tagName(el.parentNode) !== 'a') { + marks.push('underline') + } + } + } + + const text = (el.textContent ?? '').replace(/\s\s+/g, ' ') + + // console.log(text === el.textContent) + + // console.log({text}) + + return { + ...DEFAULT_SPAN, + marks, + text, + } + } + + return undefined + }, + }, + ] +} + +function isOfficeOnlineElement(element: Element) { + return ( + element.classList.contains('TextRun') || + element.classList.contains('NormalTextRun') + ) +} diff --git a/packages/block-tools/src/HtmlDeserializer/rules/span.ts b/packages/block-tools/src/HtmlDeserializer/rules/span.ts new file mode 100644 index 00000000..06e87dde --- /dev/null +++ b/packages/block-tools/src/HtmlDeserializer/rules/span.ts @@ -0,0 +1,22 @@ +import {DEFAULT_SPAN} from '../../constants' +import {DeserializerRule} from '../../types' +import {isElement, tagName} from '../helpers' + +export const spanRule: DeserializerRule = { + deserialize: (node, next) => { + if ( + isElement(node) && + tagName(node) === 'span' && + node.childNodes.length === 1 && + node.childNodes[0].nodeName === '#text' + ) { + return { + ...DEFAULT_SPAN, + marks: [], + text: (node.childNodes[0].textContent ?? '').replace(/\s\s+/g, ' '), + } + } + + return undefined + }, +} diff --git a/packages/block-tools/test/html-to-blocks/office-online-headings.html b/packages/block-tools/test/html-to-blocks/office-online-headings.html new file mode 100644 index 00000000..ec032fe6 --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-headings.html @@ -0,0 +1 @@ +

Heading 1 

Heading 2 

Heading 3 

Heading 4 

diff --git a/packages/block-tools/test/html-to-blocks/office-online-headings.test.ts b/packages/block-tools/test/html-to-blocks/office-online-headings.test.ts new file mode 100644 index 00000000..ca1500a4 --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-headings.test.ts @@ -0,0 +1,67 @@ +import fs from 'node:fs' +import path from 'node:path' +import {JSDOM} from 'jsdom' +import {expect, test} from 'vitest' +import {htmlToBlocks} from '../../src' +import defaultSchema from '../fixtures/defaultSchema' +import {createTestKeyGenerator} from '../test-key-generator' + +const blockContentType = defaultSchema + .get('blogPost') + .fields.find((field: any) => field.name === 'body').type + +const html = fs + .readFileSync(path.resolve(__dirname, 'office-online-headings.html')) + .toString() + +const keyGenerator = createTestKeyGenerator() + +test(htmlToBlocks.name, () => { + expect( + htmlToBlocks(html, blockContentType, { + parseHtml: (html) => new JSDOM(html).window.document, + keyGenerator, + }), + ).toMatchObject([ + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'Heading 1', + }, + ], + style: 'h1', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'Heading 2', + }, + ], + style: 'h2', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'Heading 3', + }, + ], + style: 'h3', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'Heading 4', + }, + ], + style: 'h4', + }, + ]) +}) diff --git a/packages/block-tools/test/html-to-blocks/office-online-link.html b/packages/block-tools/test/html-to-blocks/office-online-link.html new file mode 100644 index 00000000..3ae1118c --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-link.html @@ -0,0 +1 @@ +a link diff --git a/packages/block-tools/test/html-to-blocks/office-online-link.test.ts b/packages/block-tools/test/html-to-blocks/office-online-link.test.ts new file mode 100644 index 00000000..e13b8314 --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-link.test.ts @@ -0,0 +1,46 @@ +import fs from 'node:fs' +import path from 'node:path' +import {JSDOM} from 'jsdom' +import {expect, test} from 'vitest' +import {htmlToBlocks} from '../../src' +import defaultSchema from '../fixtures/defaultSchema' +import {createTestKeyGenerator} from '../test-key-generator' + +const blockContentType = defaultSchema + .get('blogPost') + .fields.find((field: any) => field.name === 'body').type + +const html = fs + .readFileSync(path.resolve(__dirname, 'office-online-link.html')) + .toString() + +const keyGenerator = createTestKeyGenerator() + +test(htmlToBlocks.name, () => { + expect( + htmlToBlocks(html, blockContentType, { + parseHtml: (html) => new JSDOM(html).window.document, + keyGenerator, + }), + ).toMatchObject([ + { + _key: 'randomKey1', + _type: 'block', + children: [ + { + _key: 'randomKey2', + _type: 'span', + text: 'a link', + marks: ['randomKey0'], + }, + ], + markDefs: [ + { + _key: 'randomKey0', + _type: 'link', + href: 'https://example.com/', + }, + ], + }, + ]) +}) diff --git a/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.html b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.html new file mode 100644 index 00000000..50d41fa2 --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.html @@ -0,0 +1 @@ +
  1. foo 

  • bar 

  1. baz 

  1. fizz 

  • buzz 

diff --git a/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.test.ts b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.test.ts new file mode 100644 index 00000000..6064c240 --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.test.ts @@ -0,0 +1,82 @@ +import fs from 'node:fs' +import path from 'node:path' +import {JSDOM} from 'jsdom' +import {expect, test} from 'vitest' +import {htmlToBlocks} from '../../src' +import defaultSchema from '../fixtures/defaultSchema' +import {createTestKeyGenerator} from '../test-key-generator' + +const blockContentType = defaultSchema + .get('blogPost') + .fields.find((field: any) => field.name === 'body').type + +const html = fs + .readFileSync(path.resolve(__dirname, 'office-online-nested-mixed-list.html')) + .toString() + +const keyGenerator = createTestKeyGenerator() + +test(htmlToBlocks.name, () => { + expect( + htmlToBlocks(html, blockContentType, { + parseHtml: (html) => new JSDOM(html).window.document, + keyGenerator, + }), + ).toMatchObject([ + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'foo', + }, + ], + level: 1, + listItem: 'number', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'bar', + }, + ], + level: 2, + listItem: 'bullet', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'baz', + }, + ], + level: 3, + listItem: 'number', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'fizz', + }, + ], + level: 1, + listItem: 'number', + }, + { + _type: 'block', + children: [ + { + _type: 'span', + text: 'buzz', + }, + ], + level: 2, + listItem: 'bullet', + }, + ]) +}) diff --git a/packages/block-tools/test/html-to-blocks/office-online-ordered-list.html b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.html new file mode 100644 index 00000000..dde2477b --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.html @@ -0,0 +1 @@ +
  1. foo 

  1. bar 

  1. baz 

  1. buzz 

diff --git a/packages/block-tools/test/html-to-blocks/office-online-ordered-list.test.ts b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.test.ts new file mode 100644 index 00000000..bb8bd93d --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.test.ts @@ -0,0 +1,113 @@ +import fs from 'node:fs' +import path from 'node:path' +import {JSDOM} from 'jsdom' +import {expect, test} from 'vitest' +import {htmlToBlocks} from '../../src' +import defaultSchema from '../fixtures/defaultSchema' +import {createTestKeyGenerator} from '../test-key-generator' + +const blockContentType = defaultSchema + .get('blogPost') + .fields.find((field: any) => field.name === 'body').type + +const html = fs + .readFileSync(path.resolve(__dirname, 'office-online-ordered-list.html')) + .toString() + +const keyGenerator = createTestKeyGenerator() + +test(htmlToBlocks.name, () => { + expect( + htmlToBlocks(html, blockContentType, { + parseHtml: (html) => new JSDOM(html).window.document, + keyGenerator, + }), + ).toEqual([ + { + _type: 'block', + _key: 'randomKey1', + children: [ + { + _type: 'span', + _key: 'randomKey2', + marks: [], + text: 'foo', + }, + ], + level: 1, + listItem: 'number', + markDefs: [], + style: 'normal', + }, + { + _type: 'block', + _key: 'randomKey3', + children: [ + { + _type: 'span', + _key: 'randomKey4', + marks: ['strong'], + text: 'bar', + }, + ], + level: 2, + listItem: 'number', + markDefs: [], + style: 'normal', + }, + { + _type: 'block', + _key: 'randomKey5', + children: [ + { + _type: 'span', + _key: 'randomKey6', + marks: ['em'], + text: 'baz', + }, + ], + level: 2, + listItem: 'number', + markDefs: [], + style: 'normal', + }, + { + _type: 'block', + _key: 'randomKey7', + children: [ + { + _type: 'span', + _key: 'randomKey8', + marks: ['randomKey0'], + text: 'fizz', + }, + ], + level: 1, + listItem: 'number', + markDefs: [ + { + _key: 'randomKey0', + _type: 'link', + href: 'https://example.com/', + }, + ], + style: 'normal', + }, + { + _type: 'block', + _key: 'randomKey9', + children: [ + { + _type: 'span', + _key: 'randomKey10', + marks: [], + text: 'buzz', + }, + ], + level: 1, + listItem: 'number', + markDefs: [], + style: 'normal', + }, + ]) +}) diff --git a/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.html b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.html new file mode 100644 index 00000000..69e913f7 --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.html @@ -0,0 +1 @@ +bold italic underline  diff --git a/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.test.ts b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.test.ts new file mode 100644 index 00000000..73aba5aa --- /dev/null +++ b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.test.ts @@ -0,0 +1,63 @@ +import fs from 'node:fs' +import path from 'node:path' +import {JSDOM} from 'jsdom' +import {expect, test} from 'vitest' +import {htmlToBlocks} from '../../src' +import defaultSchema from '../fixtures/defaultSchema' +import {createTestKeyGenerator} from '../test-key-generator' + +const blockContentType = defaultSchema + .get('blogPost') + .fields.find((field: any) => field.name === 'body').type + +const html = fs + .readFileSync(path.resolve(__dirname, 'office-online-simple-decorators.html')) + .toString() + +const keyGenerator = createTestKeyGenerator() + +test(htmlToBlocks.name, () => { + expect( + htmlToBlocks(html, blockContentType, { + parseHtml: (html) => new JSDOM(html).window.document, + keyGenerator, + }), + ).toMatchObject([ + { + _key: 'randomKey0', + _type: 'block', + children: [ + { + _key: 'randomKey1', + _type: 'span', + text: 'bold', + marks: ['strong'], + }, + { + _key: 'randomKey2', + _type: 'span', + text: ' ', + marks: [], + }, + { + _key: 'randomKey3', + _type: 'span', + text: 'italic', + marks: ['em'], + }, + { + _key: 'randomKey4', + _type: 'span', + text: ' ', + marks: [], + }, + { + _key: 'randomKey5', + _type: 'span', + text: 'underline', + marks: ['underline'], + }, + ], + }, + ]) +})