diff --git a/packages/block-tools/src/HtmlDeserializer/index.ts b/packages/block-tools/src/HtmlDeserializer/index.ts
index f29e01e5..9a5cc0d6 100644
--- a/packages/block-tools/src/HtmlDeserializer/index.ts
+++ b/packages/block-tools/src/HtmlDeserializer/index.ts
@@ -80,11 +80,14 @@ export default class HtmlDeserializer {
const fragment = parseHtml(html)
const children = Array.from(fragment.childNodes) as HTMLElement[]
// Ensure that there are no blocks within blocks, and trim whitespace
- const blocks = trimWhitespace(
- flattenNestedBlocks(
- ensureRootIsBlocks(this.deserializeElements(children)),
- ),
- )
+ const deserializedElements = this.deserializeElements(children)
+ // console.log('deserializedElements', deserializedElements)
+ const rawBlocks = ensureRootIsBlocks(deserializedElements)
+ // console.log('rawBlocks', rawBlocks)
+ const flattenedBlocks = flattenNestedBlocks(rawBlocks)
+ // console.log('flattenedBlocks', flattenedBlocks)
+ const blocks = trimWhitespace(flattenedBlocks)
+ // console.log('blocks', blocks)
if (this._markDefs.length > 0) {
blocks
diff --git a/packages/block-tools/src/HtmlDeserializer/rules/index.ts b/packages/block-tools/src/HtmlDeserializer/rules/index.ts
index 9e3fe992..b7eee738 100644
--- a/packages/block-tools/src/HtmlDeserializer/rules/index.ts
+++ b/packages/block-tools/src/HtmlDeserializer/rules/index.ts
@@ -3,6 +3,7 @@ import type {BlockEnabledFeatures, DeserializerRule} from '../../types'
import createGDocsRules from './gdocs'
import createHTMLRules from './html'
import createNotionRules from './notion'
+import {createOfficeOnlineRules} from './office-online'
import createWordRules from './word'
export function createRules(
@@ -13,6 +14,7 @@ export function createRules(
...createWordRules(),
...createNotionRules(blockContentType),
...createGDocsRules(blockContentType, options),
+ ...createOfficeOnlineRules(),
...createHTMLRules(blockContentType, options),
]
}
diff --git a/packages/block-tools/src/HtmlDeserializer/rules/office-online.ts b/packages/block-tools/src/HtmlDeserializer/rules/office-online.ts
new file mode 100644
index 00000000..5308842e
--- /dev/null
+++ b/packages/block-tools/src/HtmlDeserializer/rules/office-online.ts
@@ -0,0 +1,128 @@
+import {BLOCK_DEFAULT_STYLE, DEFAULT_BLOCK, DEFAULT_SPAN} from '../../constants'
+import {DeserializerRule} from '../../types'
+import {isElement, tagName} from '../helpers'
+import {spanRule} from './span'
+
+export function createOfficeOnlineRules(): Array {
+ return [
+ {
+ // Headings
+ deserialize(el, next) {
+ if (isElement(el) && tagName(el) === 'p') {
+ const role = el.getAttribute('role')
+ const levelRaw = el.getAttribute('aria-level')
+ const level =
+ typeof levelRaw === 'string'
+ ? Number.parseInt(levelRaw, 10)
+ : undefined
+ const style = level !== undefined ? `h${level}` : undefined
+
+ if (role === 'heading' && style !== undefined) {
+ return {
+ ...DEFAULT_BLOCK,
+ style,
+ children: next(el.childNodes),
+ }
+ }
+ }
+ },
+ },
+ {
+ // Lists
+ deserialize(el, next) {
+ if (isElement(el) && tagName(el) === 'li') {
+ const ariaLevel = el.getAttribute('data-aria-level')
+ const level = ariaLevel ? Number.parseInt(ariaLevel, 10) : undefined
+
+ const parentElement = el.parentElement
+ const listItem =
+ parentElement && tagName(parentElement) === 'ul'
+ ? 'bullet'
+ : tagName(parentElement) === 'ol'
+ ? 'number'
+ : undefined
+
+ if (!listItem || level === undefined) {
+ return
+ }
+
+ return {
+ ...DEFAULT_BLOCK,
+ ...(listItem !== undefined && level !== undefined
+ ? {listItem, level}
+ : {}),
+ children: next(el.childNodes),
+ style: BLOCK_DEFAULT_STYLE,
+ }
+ }
+ },
+ },
+ {
+ // Spans
+ deserialize(el, next, block) {
+ // if (isElement(el) && tagName(el) === 'span') {
+ // console.log('span', {text:el.textContent}, el.classList.contains('EmptyTextRun'))
+ // }
+
+ if (
+ isElement(el) &&
+ tagName(el) === 'span' &&
+ el.classList.contains('EOP')
+ ) {
+ // return {
+ // ...DEFAULT_SPAN,
+ // text: '',
+ // }
+ // console.log('span', {text: el.textContent}, el.classList.toString())
+ return undefined
+ }
+
+ if (
+ isElement(el) &&
+ tagName(el) === 'span' &&
+ el.classList.contains('TextRun')
+ ) {
+ const marks: Array = []
+ const style = el.getAttribute('style')
+
+ if (style) {
+ if (/font-style\s*:\s*italic/.test(style)) {
+ marks.push('em')
+ }
+
+ if (/font-weight\s*:\s*bold/.test(style)) {
+ marks.push('strong')
+ }
+
+ if (/text-decoration\s*:\s*underline/.test(style)) {
+ if (tagName(el.parentNode) !== 'a') {
+ marks.push('underline')
+ }
+ }
+ }
+
+ const text = (el.textContent ?? '').replace(/\s\s+/g, ' ')
+
+ // console.log(text === el.textContent)
+
+ // console.log({text})
+
+ return {
+ ...DEFAULT_SPAN,
+ marks,
+ text,
+ }
+ }
+
+ return undefined
+ },
+ },
+ ]
+}
+
+function isOfficeOnlineElement(element: Element) {
+ return (
+ element.classList.contains('TextRun') ||
+ element.classList.contains('NormalTextRun')
+ )
+}
diff --git a/packages/block-tools/src/HtmlDeserializer/rules/span.ts b/packages/block-tools/src/HtmlDeserializer/rules/span.ts
new file mode 100644
index 00000000..06e87dde
--- /dev/null
+++ b/packages/block-tools/src/HtmlDeserializer/rules/span.ts
@@ -0,0 +1,22 @@
+import {DEFAULT_SPAN} from '../../constants'
+import {DeserializerRule} from '../../types'
+import {isElement, tagName} from '../helpers'
+
+export const spanRule: DeserializerRule = {
+ deserialize: (node, next) => {
+ if (
+ isElement(node) &&
+ tagName(node) === 'span' &&
+ node.childNodes.length === 1 &&
+ node.childNodes[0].nodeName === '#text'
+ ) {
+ return {
+ ...DEFAULT_SPAN,
+ marks: [],
+ text: (node.childNodes[0].textContent ?? '').replace(/\s\s+/g, ' '),
+ }
+ }
+
+ return undefined
+ },
+}
diff --git a/packages/block-tools/test/html-to-blocks/office-online-headings.html b/packages/block-tools/test/html-to-blocks/office-online-headings.html
new file mode 100644
index 00000000..ec032fe6
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-headings.html
@@ -0,0 +1 @@
+
diff --git a/packages/block-tools/test/html-to-blocks/office-online-headings.test.ts b/packages/block-tools/test/html-to-blocks/office-online-headings.test.ts
new file mode 100644
index 00000000..ca1500a4
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-headings.test.ts
@@ -0,0 +1,67 @@
+import fs from 'node:fs'
+import path from 'node:path'
+import {JSDOM} from 'jsdom'
+import {expect, test} from 'vitest'
+import {htmlToBlocks} from '../../src'
+import defaultSchema from '../fixtures/defaultSchema'
+import {createTestKeyGenerator} from '../test-key-generator'
+
+const blockContentType = defaultSchema
+ .get('blogPost')
+ .fields.find((field: any) => field.name === 'body').type
+
+const html = fs
+ .readFileSync(path.resolve(__dirname, 'office-online-headings.html'))
+ .toString()
+
+const keyGenerator = createTestKeyGenerator()
+
+test(htmlToBlocks.name, () => {
+ expect(
+ htmlToBlocks(html, blockContentType, {
+ parseHtml: (html) => new JSDOM(html).window.document,
+ keyGenerator,
+ }),
+ ).toMatchObject([
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'Heading 1',
+ },
+ ],
+ style: 'h1',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'Heading 2',
+ },
+ ],
+ style: 'h2',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'Heading 3',
+ },
+ ],
+ style: 'h3',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'Heading 4',
+ },
+ ],
+ style: 'h4',
+ },
+ ])
+})
diff --git a/packages/block-tools/test/html-to-blocks/office-online-link.html b/packages/block-tools/test/html-to-blocks/office-online-link.html
new file mode 100644
index 00000000..3ae1118c
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-link.html
@@ -0,0 +1 @@
+a link
diff --git a/packages/block-tools/test/html-to-blocks/office-online-link.test.ts b/packages/block-tools/test/html-to-blocks/office-online-link.test.ts
new file mode 100644
index 00000000..e13b8314
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-link.test.ts
@@ -0,0 +1,46 @@
+import fs from 'node:fs'
+import path from 'node:path'
+import {JSDOM} from 'jsdom'
+import {expect, test} from 'vitest'
+import {htmlToBlocks} from '../../src'
+import defaultSchema from '../fixtures/defaultSchema'
+import {createTestKeyGenerator} from '../test-key-generator'
+
+const blockContentType = defaultSchema
+ .get('blogPost')
+ .fields.find((field: any) => field.name === 'body').type
+
+const html = fs
+ .readFileSync(path.resolve(__dirname, 'office-online-link.html'))
+ .toString()
+
+const keyGenerator = createTestKeyGenerator()
+
+test(htmlToBlocks.name, () => {
+ expect(
+ htmlToBlocks(html, blockContentType, {
+ parseHtml: (html) => new JSDOM(html).window.document,
+ keyGenerator,
+ }),
+ ).toMatchObject([
+ {
+ _key: 'randomKey1',
+ _type: 'block',
+ children: [
+ {
+ _key: 'randomKey2',
+ _type: 'span',
+ text: 'a link',
+ marks: ['randomKey0'],
+ },
+ ],
+ markDefs: [
+ {
+ _key: 'randomKey0',
+ _type: 'link',
+ href: 'https://example.com/',
+ },
+ ],
+ },
+ ])
+})
diff --git a/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.html b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.html
new file mode 100644
index 00000000..50d41fa2
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.html
@@ -0,0 +1 @@
+
diff --git a/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.test.ts b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.test.ts
new file mode 100644
index 00000000..6064c240
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-nested-mixed-list.test.ts
@@ -0,0 +1,82 @@
+import fs from 'node:fs'
+import path from 'node:path'
+import {JSDOM} from 'jsdom'
+import {expect, test} from 'vitest'
+import {htmlToBlocks} from '../../src'
+import defaultSchema from '../fixtures/defaultSchema'
+import {createTestKeyGenerator} from '../test-key-generator'
+
+const blockContentType = defaultSchema
+ .get('blogPost')
+ .fields.find((field: any) => field.name === 'body').type
+
+const html = fs
+ .readFileSync(path.resolve(__dirname, 'office-online-nested-mixed-list.html'))
+ .toString()
+
+const keyGenerator = createTestKeyGenerator()
+
+test(htmlToBlocks.name, () => {
+ expect(
+ htmlToBlocks(html, blockContentType, {
+ parseHtml: (html) => new JSDOM(html).window.document,
+ keyGenerator,
+ }),
+ ).toMatchObject([
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'foo',
+ },
+ ],
+ level: 1,
+ listItem: 'number',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'bar',
+ },
+ ],
+ level: 2,
+ listItem: 'bullet',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'baz',
+ },
+ ],
+ level: 3,
+ listItem: 'number',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'fizz',
+ },
+ ],
+ level: 1,
+ listItem: 'number',
+ },
+ {
+ _type: 'block',
+ children: [
+ {
+ _type: 'span',
+ text: 'buzz',
+ },
+ ],
+ level: 2,
+ listItem: 'bullet',
+ },
+ ])
+})
diff --git a/packages/block-tools/test/html-to-blocks/office-online-ordered-list.html b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.html
new file mode 100644
index 00000000..dde2477b
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.html
@@ -0,0 +1 @@
+
diff --git a/packages/block-tools/test/html-to-blocks/office-online-ordered-list.test.ts b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.test.ts
new file mode 100644
index 00000000..bb8bd93d
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-ordered-list.test.ts
@@ -0,0 +1,113 @@
+import fs from 'node:fs'
+import path from 'node:path'
+import {JSDOM} from 'jsdom'
+import {expect, test} from 'vitest'
+import {htmlToBlocks} from '../../src'
+import defaultSchema from '../fixtures/defaultSchema'
+import {createTestKeyGenerator} from '../test-key-generator'
+
+const blockContentType = defaultSchema
+ .get('blogPost')
+ .fields.find((field: any) => field.name === 'body').type
+
+const html = fs
+ .readFileSync(path.resolve(__dirname, 'office-online-ordered-list.html'))
+ .toString()
+
+const keyGenerator = createTestKeyGenerator()
+
+test(htmlToBlocks.name, () => {
+ expect(
+ htmlToBlocks(html, blockContentType, {
+ parseHtml: (html) => new JSDOM(html).window.document,
+ keyGenerator,
+ }),
+ ).toEqual([
+ {
+ _type: 'block',
+ _key: 'randomKey1',
+ children: [
+ {
+ _type: 'span',
+ _key: 'randomKey2',
+ marks: [],
+ text: 'foo',
+ },
+ ],
+ level: 1,
+ listItem: 'number',
+ markDefs: [],
+ style: 'normal',
+ },
+ {
+ _type: 'block',
+ _key: 'randomKey3',
+ children: [
+ {
+ _type: 'span',
+ _key: 'randomKey4',
+ marks: ['strong'],
+ text: 'bar',
+ },
+ ],
+ level: 2,
+ listItem: 'number',
+ markDefs: [],
+ style: 'normal',
+ },
+ {
+ _type: 'block',
+ _key: 'randomKey5',
+ children: [
+ {
+ _type: 'span',
+ _key: 'randomKey6',
+ marks: ['em'],
+ text: 'baz',
+ },
+ ],
+ level: 2,
+ listItem: 'number',
+ markDefs: [],
+ style: 'normal',
+ },
+ {
+ _type: 'block',
+ _key: 'randomKey7',
+ children: [
+ {
+ _type: 'span',
+ _key: 'randomKey8',
+ marks: ['randomKey0'],
+ text: 'fizz',
+ },
+ ],
+ level: 1,
+ listItem: 'number',
+ markDefs: [
+ {
+ _key: 'randomKey0',
+ _type: 'link',
+ href: 'https://example.com/',
+ },
+ ],
+ style: 'normal',
+ },
+ {
+ _type: 'block',
+ _key: 'randomKey9',
+ children: [
+ {
+ _type: 'span',
+ _key: 'randomKey10',
+ marks: [],
+ text: 'buzz',
+ },
+ ],
+ level: 1,
+ listItem: 'number',
+ markDefs: [],
+ style: 'normal',
+ },
+ ])
+})
diff --git a/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.html b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.html
new file mode 100644
index 00000000..69e913f7
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.html
@@ -0,0 +1 @@
+bold italic underline
diff --git a/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.test.ts b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.test.ts
new file mode 100644
index 00000000..73aba5aa
--- /dev/null
+++ b/packages/block-tools/test/html-to-blocks/office-online-simple-decorators.test.ts
@@ -0,0 +1,63 @@
+import fs from 'node:fs'
+import path from 'node:path'
+import {JSDOM} from 'jsdom'
+import {expect, test} from 'vitest'
+import {htmlToBlocks} from '../../src'
+import defaultSchema from '../fixtures/defaultSchema'
+import {createTestKeyGenerator} from '../test-key-generator'
+
+const blockContentType = defaultSchema
+ .get('blogPost')
+ .fields.find((field: any) => field.name === 'body').type
+
+const html = fs
+ .readFileSync(path.resolve(__dirname, 'office-online-simple-decorators.html'))
+ .toString()
+
+const keyGenerator = createTestKeyGenerator()
+
+test(htmlToBlocks.name, () => {
+ expect(
+ htmlToBlocks(html, blockContentType, {
+ parseHtml: (html) => new JSDOM(html).window.document,
+ keyGenerator,
+ }),
+ ).toMatchObject([
+ {
+ _key: 'randomKey0',
+ _type: 'block',
+ children: [
+ {
+ _key: 'randomKey1',
+ _type: 'span',
+ text: 'bold',
+ marks: ['strong'],
+ },
+ {
+ _key: 'randomKey2',
+ _type: 'span',
+ text: ' ',
+ marks: [],
+ },
+ {
+ _key: 'randomKey3',
+ _type: 'span',
+ text: 'italic',
+ marks: ['em'],
+ },
+ {
+ _key: 'randomKey4',
+ _type: 'span',
+ text: ' ',
+ marks: [],
+ },
+ {
+ _key: 'randomKey5',
+ _type: 'span',
+ text: 'underline',
+ marks: ['underline'],
+ },
+ ],
+ },
+ ])
+})