From c41df420d773971f3a366f69279bee1ca901e031 Mon Sep 17 00:00:00 2001 From: Sidharth Vinod Date: Fri, 9 Jun 2023 11:06:45 +0530 Subject: [PATCH] Add splitText --- .../src/rendering-util/splitText.spec.ts | 37 +++++ .../mermaid/src/rendering-util/splitText.ts | 135 ++++++++++++++++++ tsconfig.json | 2 +- 3 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 packages/mermaid/src/rendering-util/splitText.spec.ts create mode 100644 packages/mermaid/src/rendering-util/splitText.ts diff --git a/packages/mermaid/src/rendering-util/splitText.spec.ts b/packages/mermaid/src/rendering-util/splitText.spec.ts new file mode 100644 index 0000000000..6444627d2f --- /dev/null +++ b/packages/mermaid/src/rendering-util/splitText.spec.ts @@ -0,0 +1,37 @@ +import { splitTextToChars, splitLineToFitWidthLoop, type CheckFitFunction } from './splitText.js'; +import { describe, it, expect } from 'vitest'; + +describe('splitText', () => { + it.each([ + { str: '', split: [] }, + { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] }, + { str: 'ok', split: ['o', 'k'] }, + ])('should split $str into graphemes', ({ str, split }: { str: string; split: string[] }) => { + expect(splitTextToChars(str)).toEqual(split); + }); +}); + +describe('split lines', () => { + it.each([ + // empty string + { str: '', width: 1, split: [''] }, + // Width >= Individual words + { str: 'hello world', width: 5, split: ['hello', 'world'] }, + { str: 'hello world', width: 7, split: ['hello', 'world'] }, + // width > full line + { str: 'hello world', width: 20, split: ['hello world'] }, + // width < individual word + { str: 'hello world', width: 3, split: ['hel', 'lo', 'wor', 'ld'] }, + { str: 'hello 12 world', width: 4, split: ['hell', 'o 12', 'worl', 'd'] }, + { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 1, split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] }, + { str: 'Flag 🏳️‍⚧️ this 🏳️‍🌈', width: 6, split: ['Flag 🏳️‍⚧️', 'this 🏳️‍🌈'] }, + ])( + 'should split $str into lines of $width characters', + ({ str, split, width }: { str: string; width: number; split: string[] }) => { + const checkFn: CheckFitFunction = (text: string) => { + return splitTextToChars(text).length <= width; + }; + expect(splitLineToFitWidthLoop(str.split(' '), checkFn)).toEqual(split); + } + ); +}); diff --git a/packages/mermaid/src/rendering-util/splitText.ts b/packages/mermaid/src/rendering-util/splitText.ts new file mode 100644 index 0000000000..de71fdafdb --- /dev/null +++ b/packages/mermaid/src/rendering-util/splitText.ts @@ -0,0 +1,135 @@ +export type CheckFitFunction = (text: string) => boolean; + +/** + * Splits a string into graphemes if available, otherwise characters. + */ +export function splitTextToChars(text: string): string[] { + if (Intl.Segmenter) { + return [...new Intl.Segmenter().segment(text)].map((s) => s.segment); + } + return [...text]; +} + +export function splitWordToFitWidth(checkFit: CheckFitFunction, word: string): string[] { + console.error('splitWordToFitWidth', word); + const characters = splitTextToChars(word); + if (characters.length === 0) { + return []; + } + const newWord = []; + let lastCheckedCharacter = ''; + while (characters.length > 0) { + lastCheckedCharacter = characters.shift() ?? ' '; + if (checkFit([...newWord, lastCheckedCharacter].join(''))) { + newWord.push(lastCheckedCharacter); + } else if (newWord.length === 0) { + // Even the first character was too long, we cannot split it, so return it as is. + // This is an edge case that can happen when the first character is a long grapheme. + return [lastCheckedCharacter, characters.join('')]; + } else { + // The last character was too long, so we need to put it back and return the rest. + characters.unshift(lastCheckedCharacter); + break; + } + } + if (characters.length === 0) { + return [newWord.join('')]; + } + console.error({ newWord, characters }); + return [newWord.join(''), ...splitWordToFitWidth(checkFit, characters.join(''))]; +} + +export function splitWordToFitWidth2(checkFit: CheckFitFunction, word: string): [string, string] { + console.error('splitWordToFitWidth2', word); + const characters = splitTextToChars(word); + if (characters.length === 0) { + return ['', '']; + } + const newWord = []; + let lastCheckedCharacter = ''; + while (characters.length > 0) { + lastCheckedCharacter = characters.shift() ?? ' '; + if (checkFit([...newWord, lastCheckedCharacter].join(''))) { + newWord.push(lastCheckedCharacter); + } else if (newWord.length === 0) { + // Even the first character was too long, we cannot split it, so return it as is. + // This is an edge case that can happen when the first character is a long grapheme. + return [lastCheckedCharacter, characters.join('')]; + } else { + // The last character was too long, so we need to put it back and return the rest. + characters.unshift(lastCheckedCharacter); + break; + } + } + console.error({ newWord, characters }); + return [newWord.join(''), characters.join('')]; +} + +export function splitLineToFitWidth( + words: string[], + checkFit: CheckFitFunction, + lines: string[] = [], + popped: string[] = [] +): string[] { + console.error('splitLineToFitWidth', { words, lines, popped }); + // Return if there is nothing left to split + if (words.length === 0 && popped.length === 0) { + return lines; + } + const remainingText = words.join(' '); + if (checkFit(remainingText)) { + lines.push(remainingText); + words = [...popped]; + } + if (words.length > 1) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + popped.unshift(words.pop()!); + return splitLineToFitWidth(words, checkFit, lines, popped); + } else if (words.length === 1) { + const [word, rest] = splitWordToFitWidth(checkFit, words[0]); + lines.push(word); + console.error({ word, rest }); + if (rest) { + return splitLineToFitWidth([rest], checkFit, lines, []); + } + } + return lines; +} + +export function splitLineToFitWidthLoop(words: string[], checkFit: CheckFitFunction): string[] { + console.error('splitLineToFitWidthLoop', { words }); + if (words.length === 0) { + return []; + } + + const lines: string[] = []; + let newLine: string[] = []; + let lastCheckedWord = ''; + while (words.length > 0) { + lastCheckedWord = words.shift() ?? ' '; + console.error({ lastCheckedWord, words }); + if (checkFit([...newLine, lastCheckedWord].join(' '))) { + newLine.push(lastCheckedWord); + } else { + console.error({ newLine }); + if (newLine.length === 0) { + const [word, rest] = splitWordToFitWidth2(checkFit, lastCheckedWord); + console.error({ word, rest }); + lines.push(word); + if (rest) { + words.unshift(rest); + } + } else { + words.unshift(lastCheckedWord); + lines.push(newLine.join(' ')); + newLine = []; + } + } + console.error({ newLine, lastCheckedWord, words, lines }); + } + if (newLine.length > 0) { + lines.push(newLine.join(' ')); + } + console.error({ newLine, lastCheckedWord, words, lines }); + return lines; +} diff --git a/tsconfig.json b/tsconfig.json index 29c790cbbb..4cbf209a33 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -14,7 +14,7 @@ "target": "ES6" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, "lib": [ "DOM", - "ES2021" + "ES2022" ] /* Specify a set of bundled library declaration files that describe the target runtime environment. */, // "jsx": "preserve", /* Specify what JSX code is generated. */ // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */