diff --git a/plugin/admin/src/utils/translatableFields.js b/plugin/admin/src/utils/translatableFields.js index 5971a786..6bd8ce22 100644 --- a/plugin/admin/src/utils/translatableFields.js +++ b/plugin/admin/src/utils/translatableFields.js @@ -6,6 +6,7 @@ const TRANSLATABLE_FIELDS = [ 'string', 'text', 'relation', + 'blocks', ] export default TRANSLATABLE_FIELDS diff --git a/plugin/package.json b/plugin/package.json index bb59b123..c024a87c 100644 --- a/plugin/package.json +++ b/plugin/package.json @@ -29,6 +29,7 @@ "dependencies": { "@strapi/helper-plugin": "^4.15.0", "axios": "^1.7.4", + "blocks-html-renderer": "^1.0.5", "bottleneck": "^2.19.5", "jsdom": "^25.0.0", "showdown": "^2.1.0" diff --git a/plugin/server/config/index.js b/plugin/server/config/index.js index 4d7907a1..5390292e 100644 --- a/plugin/server/config/index.js +++ b/plugin/server/config/index.js @@ -45,7 +45,7 @@ module.exports = { } if ( field.format && - !['plain', 'markdown', 'html'].includes(field.format) + !['plain', 'markdown', 'html', 'jsonb'].includes(field.format) ) { throw new Error( `unhandled format ${field.format} for translated field ${field.type}` diff --git a/plugin/server/services/format.js b/plugin/server/services/format.js index aa9d055f..ceaa6d6b 100644 --- a/plugin/server/services/format.js +++ b/plugin/server/services/format.js @@ -2,6 +2,8 @@ const showdown = require('showdown') const jsdom = require('jsdom') +const renderBlock = require('blocks-html-renderer').renderBlock + const dom = new jsdom.JSDOM() const showdownConverter = new showdown.Converter({ @@ -9,6 +11,8 @@ const showdownConverter = new showdown.Converter({ strikethrough: true, }) +const blocksImageCache = new Map() + function markdownToHtml(singleText) { return showdownConverter.makeHtml(singleText) } @@ -20,6 +24,166 @@ function htmlToMarkdown(singleText) { .trim() } +/** + * + * @param {Array} blocks + */ +function cacheImages(blocks) { + for (const block of blocks.flat(2)) { + if (block.type === 'image') { + blocksImageCache.set(block.image.url, block.image) + } + } +} + +/** + * + * @param {ChildNode} childNode + * @returns {Array} + */ +function collectFormattings(childNode) { + if (childNode.nodeName === '#text' || childNode.childNodes.length === 0) { + return [] + } + if (childNode.childNodes.length > 1) { + throw new Error('collectFormattings expects an element with a single child') + } + const formattings = collectFormattings(childNode.childNodes[0]) + if (childNode.tagName === 'STRONG') { + formattings.push('bold') + } + if (childNode.tagName === 'EM') { + formattings.push('italic') + } + if (childNode.tagName === 'U') { + formattings.push('underline') + } + if (childNode.tagName === 'S') { + formattings.push('strikethrough') + } + if (childNode.tagName === 'CODE') { + formattings.push('code') + } + return formattings +} + +/** + * + * @param {HTMLElement} element + * @returns + */ +function convertInlineElementToBlocks(element) { + const elements = [] + for (const child of element.childNodes) { + if (child.tagName === 'A') { + elements.push({ + type: 'link', + url: child.href, + children: convertInlineElementToBlocks(child), + }) + continue + } + try { + const formattings = collectFormattings(child) + const element = { + type: 'text', + text: child.textContent, + } + for (const formatting of formattings) { + element[formatting] = true + } + elements.push(element) + } catch (error) { + strapi.log.error(`Error while converting inline element ${element.outerHTML} to blocks, falling back to no formatting`, error) + elements.push({ + type: 'text', + text: child.textContent, + }) + } + } + if (elements.length === 0) { + elements.push({ + type: 'text', + text: element.textContent, + }) + } + return elements +} + + +function convertHtmlToBlock(html) { + const root = dom.window.document.createElement('div') + root.innerHTML = html + + const blocks = [] + + for (const child of root.children) { + if (child.tagName === 'P') { + blocks.push({ + type: 'paragraph', + children: convertInlineElementToBlocks(child), + }) + } + if (/^H[1-6]$/.test(child.tagName)) { + const level = parseInt(child.tagName[1], 10) + blocks.push({ + type: 'heading', + level, + children: convertInlineElementToBlocks(child), + }) + } + if (/^[UO]L$/.test(child.tagName)) { + const listItems = Array.from(child.children).map(li => ({ + type: 'list-item', + children: convertInlineElementToBlocks(li), + })) + blocks.push({ + type: 'list', + format: child.tagName === 'UL' ? 'unordered' : 'ordered', + children: listItems, + }) + } + if (child.tagName === 'BLOCKQUOTE') { + blocks.push({ + type: 'quote', + children: convertInlineElementToBlocks(child), + }) + } + if (child.tagName === 'PRE') { + // pre also has a code child + const code = child.querySelector('code') + blocks.push({ + type: 'code', + children: [ + { + type: 'text', + text: code.textContent, + } + ] + }) + } + if (child.tagName === "IMG") { + const image = blocksImageCache.has(child.src) ? blocksImageCache.get(child.src) : { + url: child.src, + alt: child.alt, + } + blocks.push({ + type: 'image', + image, + children: convertInlineElementToBlocks(child), + }) + } + if (child.tagName === "A") { + blocks.push({ + type: 'link', + url: child.href, + children: convertInlineElementToBlocks(child), + }) + } + } + return blocks +} + module.exports = () => ({ markdownToHtml(text) { if (Array.isArray(text)) { @@ -33,4 +197,37 @@ module.exports = () => ({ } return htmlToMarkdown(text) }, + blockToHtml(block) { + if (!Array.isArray(block)) { + throw new Error('blockToHtml expects an array of blocks or a single block. Got ' + typeof block) + } + cacheImages(block) + if (block.length > 0 ) { + if (!block[0].type) { + return block.map(renderBlock) + } + return renderBlock(block) + } + }, + htmlToBlock(html) { + if (Array.isArray(html)) { + return html.map(convertHtmlToBlock) + } + return convertHtmlToBlock(html) + // return html.map(h => [ + // { + // type: 'paragraph', + // children: [ + // { + // type: 'text', + // text: "The following HTML has been translated. It's not yet possible to convert it back to the original block structure.", + // } + // ] + // }, + // { + // type: 'code', + // children: h.split('\n').map(text => ({ type: 'text', text })), + // } + // ]) + }, }) diff --git a/providers/deepl/lib/index.js b/providers/deepl/lib/index.js index 093a1f80..50e048c1 100644 --- a/providers/deepl/lib/index.js +++ b/providers/deepl/lib/index.js @@ -47,7 +47,7 @@ module.exports = { return { /** * @param {{ - * text:string|string[], + * text:string|string[]|any[], * sourceLocale: string, * targetLocale: string, * priority: number, @@ -68,12 +68,15 @@ module.exports = { const tagHandling = format === 'plain' ? undefined : 'html' - let textArray = Array.isArray(text) ? text : [text] - - if (format === 'markdown') { - textArray = formatService.markdownToHtml(textArray) + let input = text + if (format === 'jsonb') { + input = formatService.blockToHtml(input) + } else if (format === 'markdown') { + input = formatService.markdownToHtml(input) } + let textArray = Array.isArray(input) ? input : [input] + const { chunks, reduceFunction } = chunksService.split(textArray, { maxLength: DEEPL_API_MAX_TEXTS, maxByteSize: DEEPL_API_ROUGH_MAX_REQUEST_SIZE, @@ -98,7 +101,10 @@ module.exports = { }) ) ) - + + if (format === 'jsonb') { + return formatService.htmlToBlock(result) + } if (format === 'markdown') { return formatService.htmlToMarkdown(result) } diff --git a/providers/libretranslate/lib/index.js b/providers/libretranslate/lib/index.js index ff22bf8e..35dd763c 100644 --- a/providers/libretranslate/lib/index.js +++ b/providers/libretranslate/lib/index.js @@ -49,7 +49,7 @@ module.exports = { return { /** * @param {{ - * text:string|string[], + * text:string|string[]|any[], * sourceLocale: string, * targetLocale: string, * priority: number, @@ -73,21 +73,20 @@ module.exports = { const chunksService = getService('chunks') const formatService = getService('format') - let textArray = Array.isArray(text) ? text : [text] - - if (format === 'markdown') { - textArray = formatService.markdownToHtml(textArray) + let input = text + if (format === 'jsonb') { + input = formatService.blockToHtml(input) + } else if (format === 'markdown') { + input = formatService.markdownToHtml(input) } + const textArray = Array.isArray(input) ? input : [input] + const { chunks, reduceFunction } = chunksService.split(textArray, { maxLength: maxTexts === -1 ? Number.MAX_VALUE : maxTexts, maxByteSize: maxCharacters === -1 ? Number.MAX_VALUE : maxCharacters, }) - if (format === 'markdown') { - textArray = formatService.markdownToHtml(textArray) - } - const result = reduceFunction( await Promise.all( chunks.map(async (texts) => { diff --git a/yarn.lock b/yarn.lock index 044e8c90..44889988 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5369,6 +5369,11 @@ blob-util@^2.0.2: resolved "https://registry.yarnpkg.com/blob-util/-/blob-util-2.0.2.tgz#3b4e3c281111bb7f11128518006cdc60b403a1eb" integrity sha512-T7JQa+zsXXEa6/8ZhHcQEW1UFfVM49Ts65uBkFL6fz2QmrElqmbajIDJvuA0tEhRe5eIjpV9ZF+0RfZR9voJFQ== +blocks-html-renderer@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/blocks-html-renderer/-/blocks-html-renderer-1.0.5.tgz#c4882b9a4cd6f8a80f31d7960f9ff11dacb4f6cc" + integrity sha512-KT3GqV+xsYIgKZX2vJ7/1zEu1eiidGvzZH+Sk4eGBZ4Be/x+pXo/LJRc5a21IzbCsz68wfQd9+GqUCTM3s0NRg== + blork@^9.3.0: version "9.3.0" resolved "https://registry.yarnpkg.com/blork/-/blork-9.3.0.tgz#6c0b4fbb6b754998ae5460c26463d95c635e4a35"