From 244d6db3b8c7336894bef57ccf89904f07497201 Mon Sep 17 00:00:00 2001
From: alunturner <56027671+alunturner@users.noreply.github.com>
Date: Fri, 30 Jun 2023 09:09:39 +0100
Subject: [PATCH] Update web text parsing (#736)
* Improve parsing of markdown when building a model
---
platforms/web/lib/conversion.test.ts | 376 +++++++++++++++++++++++++--
platforms/web/lib/conversion.ts | 129 +++++++--
2 files changed, 465 insertions(+), 40 deletions(-)
diff --git a/platforms/web/lib/conversion.test.ts b/platforms/web/lib/conversion.test.ts
index e470862db..c5dd2a290 100644
--- a/platforms/web/lib/conversion.test.ts
+++ b/platforms/web/lib/conversion.test.ts
@@ -17,8 +17,8 @@ limitations under the License.
import {
richToPlain,
plainToRich,
- plainToMarkdown,
markdownToPlain,
+ plainTextInnerHtmlToMarkdown,
} from './conversion';
describe('Rich text <=> plain text', () => {
@@ -27,7 +27,6 @@ describe('Rich text <=> plain text', () => {
{ rich: 'plain', plain: 'plain' },
{ rich: 'bold', plain: '__bold__' },
{ rich: 'italic', plain: '*italic*' },
- { rich: 'underline', plain: 'underline' },
{ rich: 'strike', plain: '~~strike~~' },
];
const mappedTestCases = testCases.map(({ rich, plain }) => [rich, plain]);
@@ -43,43 +42,47 @@ describe('Rich text <=> plain text', () => {
},
);
- it('converts linebreaks for display rich => plain', async () => {
- const richText = 'multi
line';
- const convertedPlainText = await richToPlain(richText);
- const expectedPlainText = `multi\nline`;
+ it('converts underline case rich => plain', async () => {
+ // This is the html representation of underlining
+ const rich = 'underline';
+ // When we convert the plain text, we expect the output to be the `rich` string - it
+ // is then set as `.innerText` in element web so that handles html escaping entities
+ const expectedPlainText = 'underline';
+
+ const convertedPlainText = await richToPlain(rich);
expect(convertedPlainText).toBe(expectedPlainText);
});
- it('converts linebreaks for display plain => rich', async () => {
- const plainText = 'multi\nline';
- const convertedRichText = await plainToRich(plainText, false);
- const expectedRichText = 'multi
line';
+ it('converts underline case plain => rich', async () => {
+ // When the above is typed by a user in the plain text editor, the innerHTML
+ // will look like this
+ const plain = '<u>underline</u>';
+ const expectedRichText = 'underline';
+
+ const convertedRichText = await plainToRich(plain, false);
expect(convertedRichText).toBe(expectedRichText);
});
-});
-describe('Plain text <=> markdown', () => {
- it('converts single linebreak for plain => markdown', () => {
- const plain = 'multi\nline';
- const convertedMarkdown = plainToMarkdown(plain);
- const expectedMarkdown = `multi
line`;
+ it('converts linebreaks for display rich => plain', async () => {
+ const richText = 'multi
line';
+ const convertedPlainText = await richToPlain(richText);
+ const expectedPlainText = `multi\nline`;
- expect(convertedMarkdown).toBe(expectedMarkdown);
+ expect(convertedPlainText).toBe(expectedPlainText);
});
- it('converts multiple linebreak for plain => markdown', () => {
- // nb for correct display, there will be one br tag less
- // than \n at the end
- const plain = 'multiple\nline\n\nbreaks\n\n\n';
- const convertedMarkdown = plainToMarkdown(plain);
- const expectedMarkdown =
- 'multiple
line
breaks
';
+ it('converts blockquotes from plain => rich', async () => {
+ const plainText = '> I am a quote';
+ const convertedRichText = await plainToRich(plainText, false);
+ const expectedRichText = '
'; - expect(convertedMarkdown).toBe(expectedMarkdown); + expect(convertedRichText).toBe(expectedRichText); }); +}); +describe('markdownToPlain', () => { it('converts single linebreak for markdown => plain', () => { const markdown = 'multi\\\nline'; const convertedPlainText = markdownToPlain(markdown); @@ -158,3 +161,326 @@ describe('Mentions', () => { ); }); }); + +// Although a bit clunky, all of these tests simulate a plain text composer by creating a content editable +// div, appending children to it and then reading the composer's innerHTML. This way we can ensure that we +// are giving the conversion function decent input for the tests. +describe('plainTextInnerHtmlToMarkdown', () => { + let mockComposer: HTMLDivElement; + + function createMentionElement(identifier = ''): HTMLAnchorElement { + const mention = document.createElement('a'); + mention.appendChild(document.createTextNode(`inner text${identifier}`)); + mention.setAttribute('href', `testHref${identifier}`); + mention.setAttribute('data-mention-type', `testType${identifier}`); + mention.setAttribute('style', `testStyle${identifier}`); + mention.setAttribute('contenteditable', 'false'); + return mention; + } + + function createPlaceholderDiv(): HTMLDivElement { + const div = document.createElement('div'); + const br = document.createElement('br'); + div.appendChild(br); + return div; + } + + beforeEach(() => { + mockComposer = document.createElement('div'); + mockComposer.setAttribute('contenteditable', 'true'); + }); + + it('handles two lines of text, second line empty, with newline char', () => { + const textNode = document.createTextNode('firstline\n\n'); + mockComposer.appendChild(textNode); + + const expected = 'firstline\n'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles two lines of text, second line empty, with placeholder div', () => { + const textNode = document.createTextNode('firstline'); + + mockComposer.append(textNode, createPlaceholderDiv()); + + const expected = 'firstline\n'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles consecutive newlines between text lines for newline chars', () => { + const textNode = document.createTextNode('first\n\n\n\nlast'); + mockComposer.appendChild(textNode); + + const expected = 'first\n\n\n\nlast'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles consecutive newlines between text lines for placeholder divs', () => { + const firstText = document.createTextNode('first'); + + // after the placeholders have started, text can only be inserted inside divs + const lastDiv = document.createElement('div'); + const lastText = document.createTextNode('last'); + lastDiv.appendChild(lastText); + + const children = [ + firstText, + createPlaceholderDiv(), + createPlaceholderDiv(), + createPlaceholderDiv(), + lastDiv, + ]; + + mockComposer.append(...children); + + const expected = 'first\n\n\n\nlast'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles divs with a line break', () => { + const innerDiv = document.createElement('div'); + const innerBreak = document.createElement('br'); + innerDiv.appendChild(innerBreak); + mockComposer.appendChild(innerDiv); + + const expected = '\n'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles divs with text content', () => { + const innerDiv = document.createElement('div'); + innerDiv.appendChild(document.createTextNode('some text')); + mockComposer.appendChild(innerDiv); + + const expected = 'some text'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles multiple divs with text content', () => { + const firstInnerDiv = document.createElement('div'); + const secondInnerDiv = document.createElement('div'); + firstInnerDiv.appendChild(document.createTextNode('some text')); + secondInnerDiv.appendChild(document.createTextNode('some more text')); + + mockComposer.append(firstInnerDiv, secondInnerDiv); + + const expected = 'some text\nsome more text'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles div following plain text node', () => { + const firstTextNode = 'textnode text'; + const secondDiv = document.createElement('div'); + secondDiv.appendChild(document.createTextNode('some more text')); + + mockComposer.append(firstTextNode, secondDiv); + + const expected = 'textnode text\nsome more text'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles multiple adjacent text nodes at top level', () => { + // this is how chrome structures the child nodes + const strings = [ + 'first string', + '\n', + 'second string', + '\n', + 'third string', + ]; + strings.forEach((s) => + mockComposer.appendChild(document.createTextNode(s)), + ); + + const expected = 'first string\nsecond string\nthird string'; + + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles multiple adjacent text nodes in nested div', () => { + const innerDiv = document.createElement('div'); + // this is how chrome structures the child nodes + const strings = [ + 'first string', + '\n', + 'second string', + '\n', + 'third string', + ]; + strings.forEach((s) => + innerDiv.appendChild(document.createTextNode(s)), + ); + mockComposer.appendChild(innerDiv); + + const expected = 'first string\nsecond string\nthird string'; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles a mention at the top level', () => { + const mention = createMentionElement(); + mockComposer.appendChild(mention); + + // eslint-disable-next-line max-len + const expected = `inner text`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles a mention at the top level inline with textnodes', () => { + const mention = createMentionElement(); + + mockComposer.appendChild(document.createTextNode('preceding ')); + mockComposer.appendChild(mention); + mockComposer.appendChild(document.createTextNode(' following')); + + // eslint-disable-next-line max-len + const expected = `preceding inner text following`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles a nested mention', () => { + const innerDiv = document.createElement('div'); + const mention = createMentionElement(); + innerDiv.appendChild(mention); + mockComposer.appendChild(innerDiv); + + // eslint-disable-next-line max-len + const expected = `inner text`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles a nested mention with nested text nodes', () => { + const innerDiv = document.createElement('div'); + const mention = createMentionElement(); + + innerDiv.appendChild(document.createTextNode('preceding ')); + innerDiv.appendChild(mention); + innerDiv.appendChild(document.createTextNode(' following')); + mockComposer.appendChild(innerDiv); + + // eslint-disable-next-line max-len + const expected = `preceding inner text following`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles a nested mention next to top level text node', () => { + const innerDiv = document.createElement('div'); + const mention = createMentionElement(); + + mockComposer.appendChild(document.createTextNode('preceding')); + innerDiv.appendChild(mention); + mockComposer.appendChild(innerDiv); + + // eslint-disable-next-line max-len + const expected = `preceding\ninner text`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles adjacent top level mentions', () => { + ['1', '2', '3'].forEach((id) => { + const mention = createMentionElement(id); + mockComposer.appendChild(mention); + }); + + // eslint-disable-next-line max-len + const expected = `inner text1inner text2inner text3`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles adjacent nested mentions', () => { + const innerDiv = document.createElement('div'); + ['1', '2', '3'].forEach((id) => { + const mention = createMentionElement(id); + innerDiv.appendChild(mention); + }); + mockComposer.appendChild(innerDiv); + + // eslint-disable-next-line max-len + const expected = `inner text1inner text2inner text3`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('handles adjacent top level and nested mentions', () => { + const topLevelMention = createMentionElement('1'); + const nestedMention = createMentionElement('2'); + + const innerDiv = document.createElement('div'); + innerDiv.appendChild(nestedMention); + + mockComposer.append(topLevelMention, innerDiv); + + // eslint-disable-next-line max-len + const expected = `inner text1\ninner text2`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + }); + + it('outputs to console.debug for unexpected node types', () => { + const debugSpy = vi + .spyOn(console, 'debug') + .mockImplementation(() => {}); + + const unorderedList = document.createElement('ul'); + const listItem = document.createElement('li'); + const textNode = document.createTextNode('list text'); + + listItem.appendChild(textNode); + unorderedList.appendChild(listItem); + mockComposer.appendChild(unorderedList); + + // check that it pulls the inner text out correctly, despite the unexpected tags + const expected = `list text`; + expect(plainTextInnerHtmlToMarkdown(mockComposer.innerHTML)).toBe( + expected, + ); + + // check that it has also called console.debug with some useful output + expect(debugSpy.mock.calls).toMatchInlineSnapshot(` + [ + [ + "Converting unexpected node type UL", + ], + [ + "Converting unexpected node type LI", + ], + ] + `); + + // reset the spy + debugSpy.mockRestore(); + }); +}); diff --git a/platforms/web/lib/conversion.ts b/platforms/web/lib/conversion.ts index 2f4becba8..408afd2a6 100644 --- a/platforms/web/lib/conversion.ts +++ b/platforms/web/lib/conversion.ts @@ -21,18 +21,7 @@ import { } from '../generated/wysiwyg.js'; import { initOnce } from './useComposerModel.js'; -// In plain text, due to cursor positioning, ending at a linebreak will -// include an extra \n, so trim that off if required. -// We replace the remaining \n with valid markdown before -// parsing by MarkdownHTMLParser::to_html. -export const plainToMarkdown = (plainText: string) => { - let markdown = plainText; - if (markdown.endsWith('\n')) { - // manually remove the final linebreak - markdown = markdown.slice(0, -1); - } - return markdown.replaceAll(/\n/g, 'I am a quote