Add splitText

Ronid1 · Jun 9, 2023 · c41df42 · c41df42
1 parent ac488dd
commit c41df42
Show file tree

Hide file tree

Showing 3 changed files with 173 additions and 1 deletion.
diff --git a/packages/mermaid/src/rendering-util/splitText.spec.ts b/packages/mermaid/src/rendering-util/splitText.spec.ts
@@ -0,0 +1,37 @@
+import { splitTextToChars, splitLineToFitWidthLoop, type CheckFitFunction } from './splitText.js';
+import { describe, it, expect } from 'vitest';
+
+describe('splitText', () => {
+  it.each([
+    { str: '', split: [] },
+    { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
+    { str: 'ok', split: ['o', 'k'] },
+  ])('should split $str into graphemes', ({ str, split }: { str: string; split: string[] }) => {
+    expect(splitTextToChars(str)).toEqual(split);
+  });
+});
+
+describe('split lines', () => {
+  it.each([
+    // empty string
+    { str: '', width: 1, split: [''] },
+    // Width >= Individual words
+    { str: 'hello world', width: 5, split: ['hello', 'world'] },
+    { str: 'hello world', width: 7, split: ['hello', 'world'] },
+    // width > full line
+    { str: 'hello world', width: 20, split: ['hello world'] },
+    // width < individual word
+    { str: 'hello world', width: 3, split: ['hel', 'lo', 'wor', 'ld'] },
+    { str: 'hello 12 world', width: 4, split: ['hell', 'o 12', 'worl', 'd'] },
+    { str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 1, split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
+    { str: 'Flag 🏳️‍⚧️ this 🏳️‍🌈', width: 6, split: ['Flag 🏳️‍⚧️', 'this 🏳️‍🌈'] },
+  ])(
+    'should split $str into lines of $width characters',
+    ({ str, split, width }: { str: string; width: number; split: string[] }) => {
+      const checkFn: CheckFitFunction = (text: string) => {
+        return splitTextToChars(text).length <= width;
+      };
+      expect(splitLineToFitWidthLoop(str.split(' '), checkFn)).toEqual(split);
+    }
+  );
+});
diff --git a/packages/mermaid/src/rendering-util/splitText.ts b/packages/mermaid/src/rendering-util/splitText.ts
@@ -0,0 +1,135 @@
+export type CheckFitFunction = (text: string) => boolean;
+
+/**
+ * Splits a string into graphemes if available, otherwise characters.
+ */
+export function splitTextToChars(text: string): string[] {
+  if (Intl.Segmenter) {
+    return [...new Intl.Segmenter().segment(text)].map((s) => s.segment);
+  }
+  return [...text];
+}
+
+export function splitWordToFitWidth(checkFit: CheckFitFunction, word: string): string[] {
+  console.error('splitWordToFitWidth', word);
+  const characters = splitTextToChars(word);
+  if (characters.length === 0) {
+    return [];
+  }
+  const newWord = [];
+  let lastCheckedCharacter = '';
+  while (characters.length > 0) {
+    lastCheckedCharacter = characters.shift() ?? ' ';
+    if (checkFit([...newWord, lastCheckedCharacter].join(''))) {
+      newWord.push(lastCheckedCharacter);
+    } else if (newWord.length === 0) {
+      // Even the first character was too long, we cannot split it, so return it as is.
+      // This is an edge case that can happen when the first character is a long grapheme.
+      return [lastCheckedCharacter, characters.join('')];
+    } else {
+      // The last character was too long, so we need to put it back and return the rest.
+      characters.unshift(lastCheckedCharacter);
+      break;
+    }
+  }
+  if (characters.length === 0) {
+    return [newWord.join('')];
+  }
+  console.error({ newWord, characters });
+  return [newWord.join(''), ...splitWordToFitWidth(checkFit, characters.join(''))];
+}
+
+export function splitWordToFitWidth2(checkFit: CheckFitFunction, word: string): [string, string] {
+  console.error('splitWordToFitWidth2', word);
+  const characters = splitTextToChars(word);
+  if (characters.length === 0) {
+    return ['', ''];
+  }
+  const newWord = [];
+  let lastCheckedCharacter = '';
+  while (characters.length > 0) {
+    lastCheckedCharacter = characters.shift() ?? ' ';
+    if (checkFit([...newWord, lastCheckedCharacter].join(''))) {
+      newWord.push(lastCheckedCharacter);
+    } else if (newWord.length === 0) {
+      // Even the first character was too long, we cannot split it, so return it as is.
+      // This is an edge case that can happen when the first character is a long grapheme.
+      return [lastCheckedCharacter, characters.join('')];
+    } else {
+      // The last character was too long, so we need to put it back and return the rest.
+      characters.unshift(lastCheckedCharacter);
+      break;
+    }
+  }
+  console.error({ newWord, characters });
+  return [newWord.join(''), characters.join('')];
+}
+
+export function splitLineToFitWidth(
+  words: string[],
+  checkFit: CheckFitFunction,
+  lines: string[] = [],
+  popped: string[] = []
+): string[] {
+  console.error('splitLineToFitWidth', { words, lines, popped });
+  // Return if there is nothing left to split
+  if (words.length === 0 && popped.length === 0) {
+    return lines;
+  }
+  const remainingText = words.join(' ');
+  if (checkFit(remainingText)) {
+    lines.push(remainingText);
+    words = [...popped];
+  }
+  if (words.length > 1) {
+    // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+    popped.unshift(words.pop()!);
+    return splitLineToFitWidth(words, checkFit, lines, popped);
+  } else if (words.length === 1) {
+    const [word, rest] = splitWordToFitWidth(checkFit, words[0]);
+    lines.push(word);
+    console.error({ word, rest });
+    if (rest) {
+      return splitLineToFitWidth([rest], checkFit, lines, []);
+    }
+  }
+  return lines;
+}
+
+export function splitLineToFitWidthLoop(words: string[], checkFit: CheckFitFunction): string[] {
+  console.error('splitLineToFitWidthLoop', { words });
+  if (words.length === 0) {
+    return [];
+  }
+
+  const lines: string[] = [];
+  let newLine: string[] = [];
+  let lastCheckedWord = '';
+  while (words.length > 0) {
+    lastCheckedWord = words.shift() ?? ' ';
+    console.error({ lastCheckedWord, words });
+    if (checkFit([...newLine, lastCheckedWord].join(' '))) {
+      newLine.push(lastCheckedWord);
+    } else {
+      console.error({ newLine });
+      if (newLine.length === 0) {
+        const [word, rest] = splitWordToFitWidth2(checkFit, lastCheckedWord);
+        console.error({ word, rest });
+        lines.push(word);
+        if (rest) {
+          words.unshift(rest);
+        }
+      } else {
+        words.unshift(lastCheckedWord);
+        lines.push(newLine.join(' '));
+        newLine = [];
+      }
+    }
+    console.error({ newLine, lastCheckedWord, words, lines });
+  }
+  if (newLine.length > 0) {
+    lines.push(newLine.join(' '));
+  }
+  console.error({ newLine, lastCheckedWord, words, lines });
+  return lines;
+}
diff --git a/tsconfig.json b/tsconfig.json
@@ -14,7 +14,7 @@
     "target": "ES6" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
     "lib": [
       "DOM",
-      "ES2021"
+      "ES2022"
     ] /* Specify a set of bundled library declaration files that describe the target runtime environment. */,
     // "jsx": "preserve",                                /* Specify what JSX code is generated. */
     // "experimentalDecorators": true,                   /* Enable experimental support for TC39 stage 2 draft decorators. */