From 4de2fb4a538e6eec6ac016142e351b880283beea Mon Sep 17 00:00:00 2001 From: Felix Haase Date: Fri, 1 Mar 2024 22:57:13 +0100 Subject: [PATCH] feat(format): support blocks editor format This implements basic support for the blocks format for the blocks rich text editor fix #401 --- plugin/admin/src/utils/translatableFields.js | 1 + plugin/package.json | 1 + plugin/server/config/index.js | 2 +- plugin/server/services/format.js | 197 +++++++++++++++++++ providers/deepl/lib/index.js | 18 +- providers/libretranslate/lib/index.js | 17 +- 6 files changed, 220 insertions(+), 16 deletions(-) diff --git a/plugin/admin/src/utils/translatableFields.js b/plugin/admin/src/utils/translatableFields.js index 5971a786..6bd8ce22 100644 --- a/plugin/admin/src/utils/translatableFields.js +++ b/plugin/admin/src/utils/translatableFields.js @@ -6,6 +6,7 @@ const TRANSLATABLE_FIELDS = [ 'string', 'text', 'relation', + 'blocks', ] export default TRANSLATABLE_FIELDS diff --git a/plugin/package.json b/plugin/package.json index dc63d4ad..1c2cfbca 100644 --- a/plugin/package.json +++ b/plugin/package.json @@ -29,6 +29,7 @@ "dependencies": { "@strapi/helper-plugin": "^4.15.0", "axios": "^1.6.7", + "blocks-html-renderer": "^1.0.5", "bottleneck": "^2.19.5", "jsdom": "^24.0.0", "showdown": "^2.1.0" diff --git a/plugin/server/config/index.js b/plugin/server/config/index.js index 9b75ecd0..f21537f8 100644 --- a/plugin/server/config/index.js +++ b/plugin/server/config/index.js @@ -41,7 +41,7 @@ module.exports = { } if ( field.format && - !['plain', 'markdown', 'html'].includes(field.format) + !['plain', 'markdown', 'html', 'jsonb'].includes(field.format) ) { throw new Error( `unhandled format ${field.format} for translated field ${field.type}` diff --git a/plugin/server/services/format.js b/plugin/server/services/format.js index aa9d055f..ceaa6d6b 100644 --- a/plugin/server/services/format.js +++ b/plugin/server/services/format.js @@ -2,6 +2,8 @@ const showdown = require('showdown') const jsdom = require('jsdom') +const renderBlock = require('blocks-html-renderer').renderBlock + const dom = new jsdom.JSDOM() const showdownConverter = new showdown.Converter({ @@ -9,6 +11,8 @@ const showdownConverter = new showdown.Converter({ strikethrough: true, }) +const blocksImageCache = new Map() + function markdownToHtml(singleText) { return showdownConverter.makeHtml(singleText) } @@ -20,6 +24,166 @@ function htmlToMarkdown(singleText) { .trim() } +/** + * + * @param {Array} blocks + */ +function cacheImages(blocks) { + for (const block of blocks.flat(2)) { + if (block.type === 'image') { + blocksImageCache.set(block.image.url, block.image) + } + } +} + +/** + * + * @param {ChildNode} childNode + * @returns {Array} + */ +function collectFormattings(childNode) { + if (childNode.nodeName === '#text' || childNode.childNodes.length === 0) { + return [] + } + if (childNode.childNodes.length > 1) { + throw new Error('collectFormattings expects an element with a single child') + } + const formattings = collectFormattings(childNode.childNodes[0]) + if (childNode.tagName === 'STRONG') { + formattings.push('bold') + } + if (childNode.tagName === 'EM') { + formattings.push('italic') + } + if (childNode.tagName === 'U') { + formattings.push('underline') + } + if (childNode.tagName === 'S') { + formattings.push('strikethrough') + } + if (childNode.tagName === 'CODE') { + formattings.push('code') + } + return formattings +} + +/** + * + * @param {HTMLElement} element + * @returns + */ +function convertInlineElementToBlocks(element) { + const elements = [] + for (const child of element.childNodes) { + if (child.tagName === 'A') { + elements.push({ + type: 'link', + url: child.href, + children: convertInlineElementToBlocks(child), + }) + continue + } + try { + const formattings = collectFormattings(child) + const element = { + type: 'text', + text: child.textContent, + } + for (const formatting of formattings) { + element[formatting] = true + } + elements.push(element) + } catch (error) { + strapi.log.error(`Error while converting inline element ${element.outerHTML} to blocks, falling back to no formatting`, error) + elements.push({ + type: 'text', + text: child.textContent, + }) + } + } + if (elements.length === 0) { + elements.push({ + type: 'text', + text: element.textContent, + }) + } + return elements +} + + +function convertHtmlToBlock(html) { + const root = dom.window.document.createElement('div') + root.innerHTML = html + + const blocks = [] + + for (const child of root.children) { + if (child.tagName === 'P') { + blocks.push({ + type: 'paragraph', + children: convertInlineElementToBlocks(child), + }) + } + if (/^H[1-6]$/.test(child.tagName)) { + const level = parseInt(child.tagName[1], 10) + blocks.push({ + type: 'heading', + level, + children: convertInlineElementToBlocks(child), + }) + } + if (/^[UO]L$/.test(child.tagName)) { + const listItems = Array.from(child.children).map(li => ({ + type: 'list-item', + children: convertInlineElementToBlocks(li), + })) + blocks.push({ + type: 'list', + format: child.tagName === 'UL' ? 'unordered' : 'ordered', + children: listItems, + }) + } + if (child.tagName === 'BLOCKQUOTE') { + blocks.push({ + type: 'quote', + children: convertInlineElementToBlocks(child), + }) + } + if (child.tagName === 'PRE') { + // pre also has a code child + const code = child.querySelector('code') + blocks.push({ + type: 'code', + children: [ + { + type: 'text', + text: code.textContent, + } + ] + }) + } + if (child.tagName === "IMG") { + const image = blocksImageCache.has(child.src) ? blocksImageCache.get(child.src) : { + url: child.src, + alt: child.alt, + } + blocks.push({ + type: 'image', + image, + children: convertInlineElementToBlocks(child), + }) + } + if (child.tagName === "A") { + blocks.push({ + type: 'link', + url: child.href, + children: convertInlineElementToBlocks(child), + }) + } + } + return blocks +} + module.exports = () => ({ markdownToHtml(text) { if (Array.isArray(text)) { @@ -33,4 +197,37 @@ module.exports = () => ({ } return htmlToMarkdown(text) }, + blockToHtml(block) { + if (!Array.isArray(block)) { + throw new Error('blockToHtml expects an array of blocks or a single block. Got ' + typeof block) + } + cacheImages(block) + if (block.length > 0 ) { + if (!block[0].type) { + return block.map(renderBlock) + } + return renderBlock(block) + } + }, + htmlToBlock(html) { + if (Array.isArray(html)) { + return html.map(convertHtmlToBlock) + } + return convertHtmlToBlock(html) + // return html.map(h => [ + // { + // type: 'paragraph', + // children: [ + // { + // type: 'text', + // text: "The following HTML has been translated. It's not yet possible to convert it back to the original block structure.", + // } + // ] + // }, + // { + // type: 'code', + // children: h.split('\n').map(text => ({ type: 'text', text })), + // } + // ]) + }, }) diff --git a/providers/deepl/lib/index.js b/providers/deepl/lib/index.js index 093a1f80..50e048c1 100644 --- a/providers/deepl/lib/index.js +++ b/providers/deepl/lib/index.js @@ -47,7 +47,7 @@ module.exports = { return { /** * @param {{ - * text:string|string[], + * text:string|string[]|any[], * sourceLocale: string, * targetLocale: string, * priority: number, @@ -68,12 +68,15 @@ module.exports = { const tagHandling = format === 'plain' ? undefined : 'html' - let textArray = Array.isArray(text) ? text : [text] - - if (format === 'markdown') { - textArray = formatService.markdownToHtml(textArray) + let input = text + if (format === 'jsonb') { + input = formatService.blockToHtml(input) + } else if (format === 'markdown') { + input = formatService.markdownToHtml(input) } + let textArray = Array.isArray(input) ? input : [input] + const { chunks, reduceFunction } = chunksService.split(textArray, { maxLength: DEEPL_API_MAX_TEXTS, maxByteSize: DEEPL_API_ROUGH_MAX_REQUEST_SIZE, @@ -98,7 +101,10 @@ module.exports = { }) ) ) - + + if (format === 'jsonb') { + return formatService.htmlToBlock(result) + } if (format === 'markdown') { return formatService.htmlToMarkdown(result) } diff --git a/providers/libretranslate/lib/index.js b/providers/libretranslate/lib/index.js index ff22bf8e..35dd763c 100644 --- a/providers/libretranslate/lib/index.js +++ b/providers/libretranslate/lib/index.js @@ -49,7 +49,7 @@ module.exports = { return { /** * @param {{ - * text:string|string[], + * text:string|string[]|any[], * sourceLocale: string, * targetLocale: string, * priority: number, @@ -73,21 +73,20 @@ module.exports = { const chunksService = getService('chunks') const formatService = getService('format') - let textArray = Array.isArray(text) ? text : [text] - - if (format === 'markdown') { - textArray = formatService.markdownToHtml(textArray) + let input = text + if (format === 'jsonb') { + input = formatService.blockToHtml(input) + } else if (format === 'markdown') { + input = formatService.markdownToHtml(input) } + const textArray = Array.isArray(input) ? input : [input] + const { chunks, reduceFunction } = chunksService.split(textArray, { maxLength: maxTexts === -1 ? Number.MAX_VALUE : maxTexts, maxByteSize: maxCharacters === -1 ? Number.MAX_VALUE : maxCharacters, }) - if (format === 'markdown') { - textArray = formatService.markdownToHtml(textArray) - } - const result = reduceFunction( await Promise.all( chunks.map(async (texts) => {