From a4f3a0a4a846d0101477d4217a09bf2b812ccd44 Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Tue, 10 Dec 2024 11:24:54 +0100 Subject: [PATCH 1/7] Add first stab at punctuation inventory --- .../contributions/localizedStrings.json | 1 + .../contributions/menus.json | 6 + .../contributions/localizedStrings.json | 15 +- .../contributions/projectSettings.json | 12 + .../src/platform-scripture/jest.config.ts | 1 + .../punctuation-inventory.component.tsx | 252 ++++++++++++++++++ .../src/inventory.web-view.tsx | 6 + extensions/src/platform-scripture/src/main.ts | 49 +++- .../src/types/platform-scripture.d.ts | 8 + package-lock.json | 12 + package.json | 1 + 11 files changed, 353 insertions(+), 10 deletions(-) create mode 100644 extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx diff --git a/extensions/src/platform-scripture-editor/contributions/localizedStrings.json b/extensions/src/platform-scripture-editor/contributions/localizedStrings.json index 94803085bd..e61a6b0a48 100644 --- a/extensions/src/platform-scripture-editor/contributions/localizedStrings.json +++ b/extensions/src/platform-scripture-editor/contributions/localizedStrings.json @@ -13,6 +13,7 @@ "%webView_platformScriptureEditor_charactersInventory%": "Inventory: Characters...", "%webView_platformScriptureEditor_repeatedWordsInventory%": "Inventory: Repeated Words...", "%webView_platformScriptureEditor_markersInventory%": "Inventory: Markers...", + "%webView_platformScriptureEditor_punctuationInventory%": "Inventory: Punctuation...", "%webView_platformScriptureEditor_configureChecks%": "Configure Checks...", "%webView_platformScriptureEditor_showCheckResults%": "Show Check Results...", "%webView_platformScriptureEditor_publisherInfo%": "Publisher Info", diff --git a/extensions/src/platform-scripture-editor/contributions/menus.json b/extensions/src/platform-scripture-editor/contributions/menus.json index bb7fa686ff..482bfb4b5e 100644 --- a/extensions/src/platform-scripture-editor/contributions/menus.json +++ b/extensions/src/platform-scripture-editor/contributions/menus.json @@ -90,6 +90,12 @@ "order": 3, "command": "platformScripture.openMarkersInventory" }, + { + "label": "%webView_platformScriptureEditor_punctuationInventory%", + "group": "platformScriptureEditor.inventory", + "order": 4, + "command": "platformScripture.openPunctuationInventory" + }, { "label": "%webView_platformScriptureEditor_configureChecks%", "group": "platformScriptureEditor.checks", diff --git a/extensions/src/platform-scripture/contributions/localizedStrings.json b/extensions/src/platform-scripture/contributions/localizedStrings.json index e6a5ff5ad2..cdfeff3572 100644 --- a/extensions/src/platform-scripture/contributions/localizedStrings.json +++ b/extensions/src/platform-scripture/contributions/localizedStrings.json @@ -24,11 +24,16 @@ "%project_settings_platformScripture_validMarkers_description%": "List of markers that are accepted in this project.", "%project_settings_platformScripture_invalidMarkers_label%": "Invalid Markers", "%project_settings_platformScripture_invalidMarkers_description%": "List of markers that are not accepted in this project.", + "%project_settings_platformScripture_validPunctuation_label%": "Valid Punctuation", + "%project_settings_platformScripture_validPunctuation_description%": "List of punctuation characters that are accepted in this project.", + "%project_settings_platformScripture_invalidPunctuation_label%": "Invalid Punctuation", + "%project_settings_platformScripture_invalidPunctuation_description%": "List of punctuation characters that are not accepted in this project.", "%webView_platformScripture_tools%": "Tools", "%webView_platformScripture_showCheckResults%": "Show Check Results...", "%webView_characterInventory_title%": "Character Inventory: {projectName}", - "%webView_repeatedWordsInventory_title%": "Repeated Words Inventory: {projectName}", "%webView_markersInventory_title%": "Markers Inventory: {projectName}", + "%webView_punctuationInventory_title%": "Punctuation Inventory: {projectName}", + "%webView_repeatedWordsInventory_title%": "Repeated Words Inventory: {projectName}", "%webView_configureChecks_title%": "Configure Checks: {projectName}", "%webView_configureChecks_checks%": "Checks", "%webView_configureChecks_loadingChecks%": "Loading checks", @@ -50,14 +55,16 @@ "%webView_inventory_scope_verse%": "Current verse", "%webView_inventory_filter_text%": "Filter text...", "%webView_inventory_show_additional_items%": "Show Additional Items", - "%webView_inventory_table_header_repeated_words%": "Repeated Word", "%webView_inventory_table_header_character%": "Character", + "%webView_inventory_table_header_context%": "Context", + "%webView_inventory_table_header_count%": "Count", "%webView_inventory_table_header_marker%": "Marker", "%webView_inventory_table_header_preceding_marker%": "Preceding Marker", + "%webView_inventory_table_header_punctuation%": "Punctuation", + "%webView_inventory_table_header_repeated_words%": "Repeated Word", + "%webView_inventory_table_header_status%": "Status", "%webView_inventory_table_header_style_name%": "Style Name", "%webView_inventory_table_header_unicode_value%": "Unicode Value", - "%webView_inventory_table_header_count%": "Count", - "%webView_inventory_table_header_status%": "Status", "%webView_inventory_show_preceding_marker%": "Show Preceding Marker", "%webView_inventory_unknown_marker%": "Unknown Marker", "%webView_inventory_occurrences_table_header_reference%": "Reference", diff --git a/extensions/src/platform-scripture/contributions/projectSettings.json b/extensions/src/platform-scripture/contributions/projectSettings.json index 220051cbea..a2b944d0e4 100644 --- a/extensions/src/platform-scripture/contributions/projectSettings.json +++ b/extensions/src/platform-scripture/contributions/projectSettings.json @@ -49,6 +49,18 @@ "description": "%project_settings_platformScripture_invalidMarkers_description%", "default": "", "includeProjectInterfaces": ["Paratext", "Scripture"] + }, + "platformScripture.validPunctuation": { + "label": "%project_settings_platformScripture_validPunctuation_label%", + "description": "%project_settings_platformScripture_validPunctuation_description%", + "default": "", + "includeProjectInterfaces": ["Paratext", "Scripture"] + }, + "platformScripture.invalidPunctuation": { + "label": "%project_settings_platformScripture_invalidPunctuation_label%", + "description": "%project_settings_platformScripture_invalidPunctuation_description%", + "default": "", + "includeProjectInterfaces": ["Paratext", "Scripture"] } } } diff --git a/extensions/src/platform-scripture/jest.config.ts b/extensions/src/platform-scripture/jest.config.ts index 786ba1fdde..35b70302e9 100644 --- a/extensions/src/platform-scripture/jest.config.ts +++ b/extensions/src/platform-scripture/jest.config.ts @@ -6,6 +6,7 @@ module.exports = { transform: { '\\.(ts|tsx|js|jsx)$': 'ts-jest', }, + transformIgnorePatterns: ['node_modules/(?!@thetypefounders/unicode-classifier/.*)'], moduleNameMapper: { '^@platform-scripture/(.*)$': '$1', }, diff --git a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx new file mode 100644 index 0000000000..1ff98f47c8 --- /dev/null +++ b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx @@ -0,0 +1,252 @@ +import { + deepEqual, + LanguageStrings, + LocalizeKey, + ScriptureReference, + substring, +} from 'platform-bible-utils'; +import { + Button, + ColumnDef, + Inventory, + InventoryItemOccurrence, + InventoryTableData, + Scope, + getBookNumFromId, + getLinesFromUSFM, + getNumberFromUSFM, + getStatusForItem, + inventoryCountColumn, + inventoryItemColumn, + inventoryStatusColumn, +} from 'platform-bible-react'; +import { useLocalizedStrings } from '@papi/frontend/react'; +import { useMemo } from 'react'; + +const PUNCTUATION_INVENTORY_STRING_KEYS: LocalizeKey[] = [ + '%webView_inventory_table_header_count%', + '%webView_inventory_table_header_context%', + '%webView_inventory_table_header_punctuation%', + '%webView_inventory_table_header_status%', + '%webView_inventory_table_header_unicode_value%', +]; + +type PunctuationContext = 'Word Initial' | 'Word Medial' | 'Word Final' | 'Isolated' | 'Unknown'; + +const extractPunctuation = ( + text: string | undefined, + scriptureRef: ScriptureReference, + approvedItems: string[], + unapprovedItems: string[], +): InventoryTableData[] => { + if (!text) return []; + + const tableData: InventoryTableData[] = []; + + let currentBook: number | undefined = scriptureRef.bookNum; + let currentChapter: number | undefined = scriptureRef.chapterNum; + let currentVerse: number | undefined = scriptureRef.verseNum; + + // Matches all punctuation characters + const punctuationRegex: RegExp = /[\p{P}]/gu; + + let punctuationContext: PunctuationContext = 'Unknown'; + + const lines = getLinesFromUSFM(text); + + lines.forEach((line: string) => { + if (line.startsWith('\\id')) { + currentBook = getBookNumFromId(line); + currentChapter = 0; + currentVerse = 0; + } + if (line.startsWith('\\c')) { + currentChapter = getNumberFromUSFM(line); + currentVerse = 0; + } + if (line.startsWith('\\v')) { + currentVerse = getNumberFromUSFM(line); + if (currentChapter === 0) { + currentChapter = scriptureRef.chapterNum; + } + } + + let match: RegExpExecArray | undefined = punctuationRegex.exec(line) ?? undefined; + while (match) { + // For this code to work correctly we need our regular expression to match a single marker + // on each per match + if (match.length > 1) + throw new Error('Multiple punctuation characters found in a single match'); + + const item = match[0]; + [precedingMarker] = match; + const itemIndex = match.index; + const existingItem = tableData.find((tableEntry) => deepEqual(tableEntry.items, items)); + const newReference: InventoryItemOccurrence = { + reference: { + bookNum: currentBook !== undefined ? currentBook : -1, + chapterNum: currentChapter !== undefined ? currentChapter : -1, + verseNum: currentVerse !== undefined ? currentVerse : -1, + }, + text: substring(line, Math.max(0, itemIndex - 25), Math.min(itemIndex + 25, line.length)), + }; + if (existingItem) { + existingItem.count += 1; + existingItem.occurrences.push(newReference); + } else { + const newItem: InventoryTableData = { + items, + count: 1, + status: getStatusForItem(items[0], approvedItems, unapprovedItems), + occurrences: [newReference], + }; + tableData.push(newItem); + } + + match = punctuationRegex.exec(line) ?? undefined; + } + }); + + return tableData; +}; + +/** + * Function that constructs the column for the inventory component + * + * @param itemLabel Localized label for the item column (e.g. 'Character', 'Repeated Word', etc.) + * @param unicodeValueLabel Localized label for the Unicode Value column + * @param countLabel Localized label for the count column + * @param statusLabel Localized label for the status column + * @param approvedItems Array of approved items, typically as defined in `Settings.xml` + * @param onApprovedItemsChange Callback function that stores the updated list of approved items + * @param unapprovedItems Array of unapproved items, typically as defined in `Settings.xml` + * @param onUnapprovedItemsChange Callback function that stores the updated list of unapproved items + * @returns An array of columns that can be passed into the inventory component + */ +const createColumns = ( + itemLabel: string, + unicodeValueLabel: string, + contextLabel: string, + countLabel: string, + statusLabel: string, + approvedItems: string[], + onApprovedItemsChange: (items: string[]) => void, + unapprovedItems: string[], + onUnapprovedItemsChange: (items: string[]) => void, +): ColumnDef[] => [ + inventoryItemColumn(itemLabel), + { + accessorKey: 'unicodeValue', + header: () => , + cell: ({ row }) => { + const item: string = row.getValue('item'); + return item.charCodeAt(0).toString(16).toUpperCase().padStart(4, '0'); + }, + }, + inventoryStatusColumn( + statusLabel, + approvedItems, + onApprovedItemsChange, + unapprovedItems, + onUnapprovedItemsChange, + ), + inventoryCountColumn(countLabel), + { + accessorKey: 'context', + header: () => , + cell: () => { + return 'TBD'; + }, + }, +]; + +type PunctuationInventoryProps = { + scriptureReference: ScriptureReference; + setScriptureReference: (scriptureReference: ScriptureReference) => void; + localizedStrings: LanguageStrings; + approvedItems: string[]; + onApprovedItemsChange: (items: string[]) => void; + unapprovedItems: string[]; + onUnapprovedItemsChange: (items: string[]) => void; + text: string | undefined; + scope: Scope; + onScopeChange: (scope: Scope) => void; +}; + +function PunctuationInventory({ + scriptureReference, + setScriptureReference, + localizedStrings, + approvedItems, + onApprovedItemsChange, + unapprovedItems, + onUnapprovedItemsChange, + text, + scope, + onScopeChange, +}: PunctuationInventoryProps) { + const [punctuationInventoryStrings] = useLocalizedStrings(PUNCTUATION_INVENTORY_STRING_KEYS); + const itemLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_header_punctuation%'], + [punctuationInventoryStrings], + ); + const unicodeValueLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_header_unicode_value%'], + [punctuationInventoryStrings], + ); + const contextLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_header_context%'], + [punctuationInventoryStrings], + ); + const countLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_header_count%'], + [punctuationInventoryStrings], + ); + const statusLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_header_status%'], + [punctuationInventoryStrings], + ); + + const columns = useMemo( + () => + createColumns( + itemLabel, + unicodeValueLabel, + contextLabel, + countLabel, + statusLabel, + approvedItems, + onApprovedItemsChange, + unapprovedItems, + onUnapprovedItemsChange, + ), + [ + itemLabel, + unicodeValueLabel, + contextLabel, + countLabel, + statusLabel, + approvedItems, + onApprovedItemsChange, + unapprovedItems, + onUnapprovedItemsChange, + ], + ); + + return ( + + ); +} + +export default PunctuationInventory; diff --git a/extensions/src/platform-scripture/src/inventory.web-view.tsx b/extensions/src/platform-scripture/src/inventory.web-view.tsx index b333f6c824..e51378cc1c 100644 --- a/extensions/src/platform-scripture/src/inventory.web-view.tsx +++ b/extensions/src/platform-scripture/src/inventory.web-view.tsx @@ -9,6 +9,7 @@ import papi from '@papi/frontend'; import CharacterInventory from './checks/inventories/character-inventory.component'; import RepeatedWordsInventory from './checks/inventories/repeated-words-inventory.component'; import MarkerInventory from './checks/inventories/marker-inventory.component'; +import PunctuationInventory from './checks/inventories/punctuation-inventory.component'; /** * Get scripture text for the provided scope and reference for the specified projectId @@ -78,6 +79,11 @@ global.webViewComponent = function InventoryWebView({ validItemsSetting = 'platformScripture.validMarkers'; invalidItemsSetting = 'platformScripture.invalidMarkers'; break; + case 'platformScripture.punctuationInventory': + InventoryVariant = PunctuationInventory; + validItemsSetting = 'platformScripture.validPunctuation'; + invalidItemsSetting = 'platformScripture.invalidPunctuation'; + break; default: throw new Error(`${webViewType} is not a valid inventory type`); } diff --git a/extensions/src/platform-scripture/src/main.ts b/extensions/src/platform-scripture/src/main.ts index a3978dc866..56a7bd2d08 100644 --- a/extensions/src/platform-scripture/src/main.ts +++ b/extensions/src/platform-scripture/src/main.ts @@ -19,6 +19,7 @@ import CheckResultsWebViewProvider, { const characterInventoryWebViewType = 'platformScripture.characterInventory'; const repeatedWordsInventoryWebViewType = 'platformScripture.repeatedWordsInventory'; const markersInventoryWebViewType = 'platformScripture.markersInventory'; +const punctuationInventoryWebViewType = 'platformScripture.punctuationInventory'; // #region Project Setting Validators @@ -47,6 +48,11 @@ const markersValidator: ProjectSettingValidator< 'platformScripture.validMarkers' | 'platformScripture.invalidMarkers' > = async (newValue) => typeof newValue === 'string'; +// A marker can be any string value +const punctuationValidator: ProjectSettingValidator< + 'platformScripture.validPunctuation' | 'platformScripture.invalidPunctuation' +> = async (newValue) => typeof newValue === 'string'; + // #endregion async function openPlatformCharactersInventory( @@ -67,6 +73,12 @@ async function openPlatformMarkersInventory( return openInventory(webViewId, markersInventoryWebViewType); } +async function openPlatformPunctuationInventory( + webViewId: string | undefined, +): Promise { + return openInventory(webViewId, punctuationInventoryWebViewType); +} + async function openInventory( webViewId: string | undefined, webViewType: string, @@ -157,6 +169,10 @@ export async function activate(context: ExecutionActivationContext) { '%webView_markersInventory_title%', markersInventoryWebViewType, ); + const punctuationInventoryWebViewProvider = new InventoryWebViewProvider( + '%webView_punctuationInventory_title%', + punctuationInventoryWebViewType, + ); const checkResultsWebViewProvider = new CheckResultsWebViewProvider(); const configureChecksWebViewProvider = new ConfigureChecksWebViewProvider( '%webView_configureChecks_title%', @@ -198,7 +214,7 @@ export async function activate(context: ExecutionActivationContext) { 'platformScripture.openCharactersInventory', openPlatformCharactersInventory, ); - const characterInventoryWebViewProviderPromise = papi.webViewProviders.register( + const characterInventoryWebViewProviderPromise = papi.webViewProviders.registerWebViewProvider( characterInventoryWebViewType, characterInventoryWebViewProvider, ); @@ -214,10 +230,11 @@ export async function activate(context: ExecutionActivationContext) { 'platformScripture.openRepeatedWordsInventory', openPlatformRepeatedWordsInventory, ); - const repeatableWordsInventoryWebViewProviderPromise = papi.webViewProviders.register( - repeatedWordsInventoryWebViewType, - repeatedWordsInventoryWebViewProvider, - ); + const repeatableWordsInventoryWebViewProviderPromise = + papi.webViewProviders.registerWebViewProvider( + repeatedWordsInventoryWebViewType, + repeatedWordsInventoryWebViewProvider, + ); const validMarkersPromise = papi.projectSettings.registerValidator( 'platformScripture.validMarkers', markersValidator, @@ -230,10 +247,26 @@ export async function activate(context: ExecutionActivationContext) { 'platformScripture.openMarkersInventory', openPlatformMarkersInventory, ); - const markersInventoryWebViewProviderPromise = papi.webViewProviders.register( + const markersInventoryWebViewProviderPromise = papi.webViewProviders.registerWebViewProvider( markersInventoryWebViewType, markersInventoryWebViewProvider, ); + const validPunctuationPromise = papi.projectSettings.registerValidator( + 'platformScripture.validPunctuation', + punctuationValidator, + ); + const invalidPunctuationPromise = papi.projectSettings.registerValidator( + 'platformScripture.invalidPunctuation', + punctuationValidator, + ); + const openPunctuationInventoryPromise = papi.commands.registerCommand( + 'platformScripture.openPunctuationInventory', + openPlatformPunctuationInventory, + ); + const punctuationInventoryWebViewProviderPromise = papi.webViewProviders.registerWebViewProvider( + punctuationInventoryWebViewType, + punctuationInventoryWebViewProvider, + ); const configureChecksPromise = papi.commands.registerCommand( 'platformScripture.openConfigureChecks', configureChecks, @@ -272,6 +305,10 @@ export async function activate(context: ExecutionActivationContext) { await invalidMarkersPromise, await openMarkersInventoryPromise, await markersInventoryWebViewProviderPromise, + await validPunctuationPromise, + await invalidPunctuationPromise, + await openPunctuationInventoryPromise, + await punctuationInventoryWebViewProviderPromise, await configureChecksPromise, await configureChecksWebViewProviderPromise, await showCheckResultsPromise, diff --git a/extensions/src/platform-scripture/src/types/platform-scripture.d.ts b/extensions/src/platform-scripture/src/types/platform-scripture.d.ts index 3b72b135dc..47430eda7d 100644 --- a/extensions/src/platform-scripture/src/types/platform-scripture.d.ts +++ b/extensions/src/platform-scripture/src/types/platform-scripture.d.ts @@ -850,6 +850,10 @@ declare module 'papi-shared-types' { projectId?: string | undefined, ) => Promise; + 'platformScripture.openPunctuationInventory': ( + projectId?: string | undefined, + ) => Promise; + 'platformScripture.openConfigureChecks': ( projectId?: string | undefined, ) => Promise; @@ -900,5 +904,9 @@ declare module 'papi-shared-types' { 'platformScripture.validMarkers': string; 'platformScripture.invalidMarkers': string; + + 'platformScripture.validPunctuation': string; + + 'platformScripture.invalidPunctuation': string; } } diff --git a/package-lock.json b/package-lock.json index 91b00293b5..99182cc668 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,6 +17,7 @@ "@mui/icons-material": "^5.15.10", "@mui/material": "^5.15.10", "@sillsdev/scripture": "^2.0.2", + "@thetypefounders/unicode-classifier": "^1.1.0", "ajv": "^8.17.1", "chalk": "^4.1.2", "chokidar": "^3.6.0", @@ -11743,6 +11744,12 @@ "@testing-library/dom": ">=7.21.4" } }, + "node_modules/@thetypefounders/unicode-classifier": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@thetypefounders/unicode-classifier/-/unicode-classifier-1.1.0.tgz", + "integrity": "sha512-rFnGSjmHvRKZmwEyJh/7iSekphnVsHWMhHgXQqzdlGepTjrrtHGz34L0m/i94KI1FL8Cj4OscvPKNXvwIc+fFQ==", + "license": "Apache-2.0" + }, "node_modules/@tootallnate/once": { "version": "2.0.0", "dev": true, @@ -39238,6 +39245,11 @@ "dev": true, "requires": {} }, + "@thetypefounders/unicode-classifier": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@thetypefounders/unicode-classifier/-/unicode-classifier-1.1.0.tgz", + "integrity": "sha512-rFnGSjmHvRKZmwEyJh/7iSekphnVsHWMhHgXQqzdlGepTjrrtHGz34L0m/i94KI1FL8Cj4OscvPKNXvwIc+fFQ==" + }, "@tootallnate/once": { "version": "2.0.0", "dev": true diff --git a/package.json b/package.json index bebddf06d1..9c3454ce5a 100644 --- a/package.json +++ b/package.json @@ -109,6 +109,7 @@ "@mui/icons-material": "^5.15.10", "@mui/material": "^5.15.10", "@sillsdev/scripture": "^2.0.2", + "@thetypefounders/unicode-classifier": "^1.1.0", "ajv": "^8.17.1", "chalk": "^4.1.2", "chokidar": "^3.6.0", From b10a6eb361692c9ecebaf4048e1c774f22c6c84f Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Wed, 11 Dec 2024 13:13:18 +0100 Subject: [PATCH 2/7] Single punctuation works --- .../contributions/localizedStrings.json | 4 + .../punctuation-inventory.component.tsx | 139 ++++++++++++++---- 2 files changed, 117 insertions(+), 26 deletions(-) diff --git a/extensions/src/platform-scripture/contributions/localizedStrings.json b/extensions/src/platform-scripture/contributions/localizedStrings.json index cdfeff3572..c01e09702c 100644 --- a/extensions/src/platform-scripture/contributions/localizedStrings.json +++ b/extensions/src/platform-scripture/contributions/localizedStrings.json @@ -65,6 +65,10 @@ "%webView_inventory_table_header_status%": "Status", "%webView_inventory_table_header_style_name%": "Style Name", "%webView_inventory_table_header_unicode_value%": "Unicode Value", + "%webView_inventory_table_punctuation_context_isolated%": "Isolated", + "%webView_inventory_table_punctuation_context_wordInitial%": "Word Initial", + "%webView_inventory_table_punctuation_context_wordFinal%": "Word Final", + "%webView_inventory_table_punctuation_context_wordMedial%": "Word Medial", "%webView_inventory_show_preceding_marker%": "Show Preceding Marker", "%webView_inventory_unknown_marker%": "Unknown Marker", "%webView_inventory_occurrences_table_header_reference%": "Reference", diff --git a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx index 1ff98f47c8..362801d103 100644 --- a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx +++ b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx @@ -1,10 +1,4 @@ -import { - deepEqual, - LanguageStrings, - LocalizeKey, - ScriptureReference, - substring, -} from 'platform-bible-utils'; +import { LanguageStrings, LocalizeKey, ScriptureReference, substring } from 'platform-bible-utils'; import { Button, ColumnDef, @@ -29,10 +23,12 @@ const PUNCTUATION_INVENTORY_STRING_KEYS: LocalizeKey[] = [ '%webView_inventory_table_header_punctuation%', '%webView_inventory_table_header_status%', '%webView_inventory_table_header_unicode_value%', + '%webView_inventory_table_punctuation_context_isolated%', + '%webView_inventory_table_punctuation_context_wordInitial%', + '%webView_inventory_table_punctuation_context_wordFinal%', + '%webView_inventory_table_punctuation_context_wordMedial%', ]; -type PunctuationContext = 'Word Initial' | 'Word Medial' | 'Word Final' | 'Isolated' | 'Unknown'; - const extractPunctuation = ( text: string | undefined, scriptureRef: ScriptureReference, @@ -50,8 +46,6 @@ const extractPunctuation = ( // Matches all punctuation characters const punctuationRegex: RegExp = /[\p{P}]/gu; - let punctuationContext: PunctuationContext = 'Unknown'; - const lines = getLinesFromUSFM(text); lines.forEach((line: string) => { @@ -73,15 +67,51 @@ const extractPunctuation = ( let match: RegExpExecArray | undefined = punctuationRegex.exec(line) ?? undefined; while (match) { - // For this code to work correctly we need our regular expression to match a single marker - // on each per match + // For this code to work correctly we need our regular expression to match a single + // punctuation character per match if (match.length > 1) throw new Error('Multiple punctuation characters found in a single match'); - const item = match[0]; - [precedingMarker] = match; + const punctuation = match[0]; + const { index } = match; + + let prefix = ''; + let suffix = ''; + + // Check if preceding character is whitespace or not + if (index === 0) { + prefix = '_'; + } else { + for (let i = index - 1; i >= 0; i--) { + const precedingChar = line[i]; + if (/\s/.test(precedingChar)) { + prefix = '_'; + break; + } else if (!/[\p{P}]/u.test(precedingChar)) { + break; + } + } + } + + // Check if following character is whitespace or not + if (index === line.length - 1) { + suffix = '_'; + } else { + for (let i = index + 1; i < line.length; i++) { + const followingChar = line[i]; + if (/\s/.test(followingChar)) { + suffix = '_'; + break; + } else if (!/[\p{P}]/u.test(followingChar)) { + break; + } + } + } + + const item = `${prefix}${punctuation}${suffix}`; + const itemIndex = match.index; - const existingItem = tableData.find((tableEntry) => deepEqual(tableEntry.items, items)); + const existingItem = tableData.find((tableEntry) => tableEntry.items[0] === item); const newReference: InventoryItemOccurrence = { reference: { bookNum: currentBook !== undefined ? currentBook : -1, @@ -95,9 +125,9 @@ const extractPunctuation = ( existingItem.occurrences.push(newReference); } else { const newItem: InventoryTableData = { - items, + items: [item], count: 1, - status: getStatusForItem(items[0], approvedItems, unapprovedItems), + status: getStatusForItem(item, approvedItems, unapprovedItems), occurrences: [newReference], }; tableData.push(newItem); @@ -110,6 +140,28 @@ const extractPunctuation = ( return tableData; }; +function getPunctuationContext( + item: string, + isolatedLabel: string, + wordInitialLabel: string, + wordFinalLabel: string, + wordMedialLabel: string, +): string { + if (item.startsWith('_')) { + if (item.endsWith('_')) { + return isolatedLabel; + } + + return wordInitialLabel; + } + + if (item.endsWith('_')) { + return wordFinalLabel; + } + + return wordMedialLabel; +} + /** * Function that constructs the column for the inventory component * @@ -129,6 +181,10 @@ const createColumns = ( contextLabel: string, countLabel: string, statusLabel: string, + isolatedLabel: string, + wordInitialLabel: string, + wordFinalLabel: string, + wordMedialLabel: string, approvedItems: string[], onApprovedItemsChange: (items: string[]) => void, unapprovedItems: string[], @@ -143,6 +199,21 @@ const createColumns = ( return item.charCodeAt(0).toString(16).toUpperCase().padStart(4, '0'); }, }, + inventoryCountColumn(countLabel), + { + accessorKey: 'context', + header: () => , + cell: ({ row }) => { + const item: string = row.getValue('item'); + return getPunctuationContext( + item, + isolatedLabel, + wordInitialLabel, + wordFinalLabel, + wordMedialLabel, + ); + }, + }, inventoryStatusColumn( statusLabel, approvedItems, @@ -150,14 +221,6 @@ const createColumns = ( unapprovedItems, onUnapprovedItemsChange, ), - inventoryCountColumn(countLabel), - { - accessorKey: 'context', - header: () => , - cell: () => { - return 'TBD'; - }, - }, ]; type PunctuationInventoryProps = { @@ -206,6 +269,22 @@ function PunctuationInventory({ () => punctuationInventoryStrings['%webView_inventory_table_header_status%'], [punctuationInventoryStrings], ); + const isolatedLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_isolated%'], + [punctuationInventoryStrings], + ); + const wordInitialLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordInitial%'], + [punctuationInventoryStrings], + ); + const wordFinalLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordFinal%'], + [punctuationInventoryStrings], + ); + const wordMedialLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordMedial%'], + [punctuationInventoryStrings], + ); const columns = useMemo( () => @@ -215,6 +294,10 @@ function PunctuationInventory({ contextLabel, countLabel, statusLabel, + isolatedLabel, + wordInitialLabel, + wordFinalLabel, + wordMedialLabel, approvedItems, onApprovedItemsChange, unapprovedItems, @@ -226,6 +309,10 @@ function PunctuationInventory({ contextLabel, countLabel, statusLabel, + isolatedLabel, + wordInitialLabel, + wordFinalLabel, + wordMedialLabel, approvedItems, onApprovedItemsChange, unapprovedItems, From 793bab3109e772ccb8c45eb11a4a96e347ea6590 Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Wed, 11 Dec 2024 14:36:46 +0100 Subject: [PATCH 3/7] Add 'show sequences' --- .../contributions/localizedStrings.json | 2 + .../punctuation-inventory.component.tsx | 229 ++++++++++-------- 2 files changed, 134 insertions(+), 97 deletions(-) diff --git a/extensions/src/platform-scripture/contributions/localizedStrings.json b/extensions/src/platform-scripture/contributions/localizedStrings.json index c01e09702c..5d47cc2d27 100644 --- a/extensions/src/platform-scripture/contributions/localizedStrings.json +++ b/extensions/src/platform-scripture/contributions/localizedStrings.json @@ -69,6 +69,8 @@ "%webView_inventory_table_punctuation_context_wordInitial%": "Word Initial", "%webView_inventory_table_punctuation_context_wordFinal%": "Word Final", "%webView_inventory_table_punctuation_context_wordMedial%": "Word Medial", + "%webView_inventory_table_punctuation_showSequences%": "Show Sequences", + "%webView_inventory_table_punctuation_showSinglePunctuationCharacter%": "Show Single Punctuation Character", "%webView_inventory_show_preceding_marker%": "Show Preceding Marker", "%webView_inventory_unknown_marker%": "Unknown Marker", "%webView_inventory_occurrences_table_header_reference%": "Reference", diff --git a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx index 362801d103..2e29f53cff 100644 --- a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx +++ b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx @@ -15,7 +15,7 @@ import { inventoryStatusColumn, } from 'platform-bible-react'; import { useLocalizedStrings } from '@papi/frontend/react'; -import { useMemo } from 'react'; +import { useMemo, useState } from 'react'; const PUNCTUATION_INVENTORY_STRING_KEYS: LocalizeKey[] = [ '%webView_inventory_table_header_count%', @@ -27,117 +27,128 @@ const PUNCTUATION_INVENTORY_STRING_KEYS: LocalizeKey[] = [ '%webView_inventory_table_punctuation_context_wordInitial%', '%webView_inventory_table_punctuation_context_wordFinal%', '%webView_inventory_table_punctuation_context_wordMedial%', + '%webView_inventory_table_punctuation_showSequences%', + '%webView_inventory_table_punctuation_showSinglePunctuationCharacter%', ]; const extractPunctuation = ( + showSequences: boolean, +): (( text: string | undefined, scriptureRef: ScriptureReference, approvedItems: string[], unapprovedItems: string[], -): InventoryTableData[] => { - if (!text) return []; +) => InventoryTableData[]) => { + return ( + text: string | undefined, + scriptureRef: ScriptureReference, + approvedItems: string[], + unapprovedItems: string[], + ) => { + if (!text) return []; - const tableData: InventoryTableData[] = []; + const tableData: InventoryTableData[] = []; - let currentBook: number | undefined = scriptureRef.bookNum; - let currentChapter: number | undefined = scriptureRef.chapterNum; - let currentVerse: number | undefined = scriptureRef.verseNum; + let currentBook: number | undefined = scriptureRef.bookNum; + let currentChapter: number | undefined = scriptureRef.chapterNum; + let currentVerse: number | undefined = scriptureRef.verseNum; - // Matches all punctuation characters - const punctuationRegex: RegExp = /[\p{P}]/gu; + // Matches all punctuation characters + const punctuationRegex: RegExp = showSequences ? /[\p{P}]+/gu : /[\p{P}]/gu; - const lines = getLinesFromUSFM(text); + const lines = getLinesFromUSFM(text); - lines.forEach((line: string) => { - if (line.startsWith('\\id')) { - currentBook = getBookNumFromId(line); - currentChapter = 0; - currentVerse = 0; - } - if (line.startsWith('\\c')) { - currentChapter = getNumberFromUSFM(line); - currentVerse = 0; - } - if (line.startsWith('\\v')) { - currentVerse = getNumberFromUSFM(line); - if (currentChapter === 0) { - currentChapter = scriptureRef.chapterNum; + lines.forEach((line: string) => { + if (line.startsWith('\\id')) { + currentBook = getBookNumFromId(line); + currentChapter = 0; + currentVerse = 0; + } + if (line.startsWith('\\c')) { + currentChapter = getNumberFromUSFM(line); + currentVerse = 0; + } + if (line.startsWith('\\v')) { + currentVerse = getNumberFromUSFM(line); + if (currentChapter === 0) { + currentChapter = scriptureRef.chapterNum; + } } - } - let match: RegExpExecArray | undefined = punctuationRegex.exec(line) ?? undefined; - while (match) { - // For this code to work correctly we need our regular expression to match a single - // punctuation character per match - if (match.length > 1) - throw new Error('Multiple punctuation characters found in a single match'); + let match: RegExpExecArray | undefined = punctuationRegex.exec(line) ?? undefined; + while (match) { + // For this code to work correctly we need our regular expression to match a single + // punctuation character per match + if (match.length > 1) + throw new Error('Multiple punctuation characters found in a single match'); - const punctuation = match[0]; - const { index } = match; + const punctuation = match[0]; + const { index } = match; - let prefix = ''; - let suffix = ''; + let prefix = ''; + let suffix = ''; - // Check if preceding character is whitespace or not - if (index === 0) { - prefix = '_'; - } else { - for (let i = index - 1; i >= 0; i--) { - const precedingChar = line[i]; - if (/\s/.test(precedingChar)) { - prefix = '_'; - break; - } else if (!/[\p{P}]/u.test(precedingChar)) { - break; + // Check if preceding character is whitespace or not + if (index === 0) { + prefix = '_'; + } else { + for (let i = index - 1; i >= 0; i--) { + const precedingChar = line[i]; + if (/\s/.test(precedingChar)) { + prefix = '_'; + break; + } else if (!/[\p{P}]/u.test(precedingChar)) { + break; + } } } - } - // Check if following character is whitespace or not - if (index === line.length - 1) { - suffix = '_'; - } else { - for (let i = index + 1; i < line.length; i++) { - const followingChar = line[i]; - if (/\s/.test(followingChar)) { - suffix = '_'; - break; - } else if (!/[\p{P}]/u.test(followingChar)) { - break; + // Check if following character is whitespace or not + if (index === line.length - punctuation.length) { + suffix = '_'; + } else { + for (let i = index + punctuation.length; i < line.length; i++) { + const followingChar = line[i]; + if (/\s/.test(followingChar)) { + suffix = '_'; + break; + } else if (!/[\p{P}]/u.test(followingChar)) { + break; + } } } - } - const item = `${prefix}${punctuation}${suffix}`; + const item = `${prefix}${punctuation}${suffix}`; - const itemIndex = match.index; - const existingItem = tableData.find((tableEntry) => tableEntry.items[0] === item); - const newReference: InventoryItemOccurrence = { - reference: { - bookNum: currentBook !== undefined ? currentBook : -1, - chapterNum: currentChapter !== undefined ? currentChapter : -1, - verseNum: currentVerse !== undefined ? currentVerse : -1, - }, - text: substring(line, Math.max(0, itemIndex - 25), Math.min(itemIndex + 25, line.length)), - }; - if (existingItem) { - existingItem.count += 1; - existingItem.occurrences.push(newReference); - } else { - const newItem: InventoryTableData = { - items: [item], - count: 1, - status: getStatusForItem(item, approvedItems, unapprovedItems), - occurrences: [newReference], + const itemIndex = match.index; + const existingItem = tableData.find((tableEntry) => tableEntry.items[0] === item); + const newReference: InventoryItemOccurrence = { + reference: { + bookNum: currentBook !== undefined ? currentBook : -1, + chapterNum: currentChapter !== undefined ? currentChapter : -1, + verseNum: currentVerse !== undefined ? currentVerse : -1, + }, + text: substring(line, Math.max(0, itemIndex - 25), Math.min(itemIndex + 25, line.length)), }; - tableData.push(newItem); - } + if (existingItem) { + existingItem.count += 1; + existingItem.occurrences.push(newReference); + } else { + const newItem: InventoryTableData = { + items: [item], + count: 1, + status: getStatusForItem(item, approvedItems, unapprovedItems), + occurrences: [newReference], + }; + tableData.push(newItem); + } - match = punctuationRegex.exec(line) ?? undefined; - } - }); + match = punctuationRegex.exec(line) ?? undefined; + } + }); - return tableData; + return tableData; + }; }; function getPunctuationContext( @@ -285,6 +296,19 @@ function PunctuationInventory({ () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordMedial%'], [punctuationInventoryStrings], ); + const showSequencesLabel = useMemo( + () => punctuationInventoryStrings['%webView_inventory_table_punctuation_showSequences%'], + [punctuationInventoryStrings], + ); + const showSinglePunctuationCharacterLabel = useMemo( + () => + punctuationInventoryStrings[ + '%webView_inventory_table_punctuation_showSinglePunctuationCharacter%' + ], + [punctuationInventoryStrings], + ); + + const [showSequences, setShowSequences] = useState(false); const columns = useMemo( () => @@ -321,18 +345,29 @@ function PunctuationInventory({ ); return ( - +
+ + +
); } From 5dbb1e9d971dbd64ebb6b577adde298bb63ed002 Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Wed, 11 Dec 2024 14:46:29 +0100 Subject: [PATCH 4/7] Hide context column when viewing sequences --- .../punctuation-inventory.component.tsx | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx index 2e29f53cff..873ed7ee5f 100644 --- a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx +++ b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx @@ -180,10 +180,16 @@ function getPunctuationContext( * @param unicodeValueLabel Localized label for the Unicode Value column * @param countLabel Localized label for the count column * @param statusLabel Localized label for the status column + * @param isolatedLabel Localized label for the context when punctuation appears in isolation + * @param wordInitialLabel Localized label for the context when punctuation appears word initial + * @param wordFinalLabel Localized label for the context when punctuation appears word final + * @param wordMedialLabel Localized label for the context when punctuation appears word medial * @param approvedItems Array of approved items, typically as defined in `Settings.xml` * @param onApprovedItemsChange Callback function that stores the updated list of approved items * @param unapprovedItems Array of unapproved items, typically as defined in `Settings.xml` * @param onUnapprovedItemsChange Callback function that stores the updated list of unapproved items + * @param showSequences True if inventory shows sequences of punctuation. False if it only considers + * single punctuation characters * @returns An array of columns that can be passed into the inventory component */ const createColumns = ( @@ -200,18 +206,9 @@ const createColumns = ( onApprovedItemsChange: (items: string[]) => void, unapprovedItems: string[], onUnapprovedItemsChange: (items: string[]) => void, -): ColumnDef[] => [ - inventoryItemColumn(itemLabel), - { - accessorKey: 'unicodeValue', - header: () => , - cell: ({ row }) => { - const item: string = row.getValue('item'); - return item.charCodeAt(0).toString(16).toUpperCase().padStart(4, '0'); - }, - }, - inventoryCountColumn(countLabel), - { + showSequences: boolean, +): ColumnDef[] => { + const contextColumn: ColumnDef = { accessorKey: 'context', header: () => , cell: ({ row }) => { @@ -224,15 +221,29 @@ const createColumns = ( wordMedialLabel, ); }, - }, - inventoryStatusColumn( - statusLabel, - approvedItems, - onApprovedItemsChange, - unapprovedItems, - onUnapprovedItemsChange, - ), -]; + }; + + return [ + inventoryItemColumn(itemLabel), + { + accessorKey: 'unicodeValue', + header: () => , + cell: ({ row }) => { + const item: string = row.getValue('item'); + return item.charCodeAt(0).toString(16).toUpperCase().padStart(4, '0'); + }, + }, + inventoryCountColumn(countLabel), + ...(showSequences ? [] : [contextColumn]), + inventoryStatusColumn( + statusLabel, + approvedItems, + onApprovedItemsChange, + unapprovedItems, + onUnapprovedItemsChange, + ), + ]; +}; type PunctuationInventoryProps = { scriptureReference: ScriptureReference; @@ -326,6 +337,7 @@ function PunctuationInventory({ onApprovedItemsChange, unapprovedItems, onUnapprovedItemsChange, + showSequences, ), [ itemLabel, @@ -341,6 +353,7 @@ function PunctuationInventory({ onApprovedItemsChange, unapprovedItems, onUnapprovedItemsChange, + showSequences, ], ); From 66368abb63cdc871f6e2783683cdc33458e2772e Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Mon, 16 Dec 2024 11:40:51 +0100 Subject: [PATCH 5/7] Remove package --- extensions/src/platform-scripture/jest.config.ts | 1 - package-lock.json | 12 ------------ package.json | 1 - 3 files changed, 14 deletions(-) diff --git a/extensions/src/platform-scripture/jest.config.ts b/extensions/src/platform-scripture/jest.config.ts index 35b70302e9..786ba1fdde 100644 --- a/extensions/src/platform-scripture/jest.config.ts +++ b/extensions/src/platform-scripture/jest.config.ts @@ -6,7 +6,6 @@ module.exports = { transform: { '\\.(ts|tsx|js|jsx)$': 'ts-jest', }, - transformIgnorePatterns: ['node_modules/(?!@thetypefounders/unicode-classifier/.*)'], moduleNameMapper: { '^@platform-scripture/(.*)$': '$1', }, diff --git a/package-lock.json b/package-lock.json index ec9eb68487..4c7f134c5c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,7 +17,6 @@ "@mui/icons-material": "^5.15.10", "@mui/material": "^5.15.10", "@sillsdev/scripture": "^2.0.2", - "@thetypefounders/unicode-classifier": "^1.1.0", "ajv": "^8.17.1", "chalk": "^4.1.2", "chokidar": "^3.6.0", @@ -12049,12 +12048,6 @@ "@testing-library/dom": ">=7.21.4" } }, - "node_modules/@thetypefounders/unicode-classifier": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@thetypefounders/unicode-classifier/-/unicode-classifier-1.1.0.tgz", - "integrity": "sha512-rFnGSjmHvRKZmwEyJh/7iSekphnVsHWMhHgXQqzdlGepTjrrtHGz34L0m/i94KI1FL8Cj4OscvPKNXvwIc+fFQ==", - "license": "Apache-2.0" - }, "node_modules/@tootallnate/once": { "version": "2.0.0", "dev": true, @@ -39680,11 +39673,6 @@ "dev": true, "requires": {} }, - "@thetypefounders/unicode-classifier": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@thetypefounders/unicode-classifier/-/unicode-classifier-1.1.0.tgz", - "integrity": "sha512-rFnGSjmHvRKZmwEyJh/7iSekphnVsHWMhHgXQqzdlGepTjrrtHGz34L0m/i94KI1FL8Cj4OscvPKNXvwIc+fFQ==" - }, "@tootallnate/once": { "version": "2.0.0", "dev": true diff --git a/package.json b/package.json index 9c3454ce5a..bebddf06d1 100644 --- a/package.json +++ b/package.json @@ -109,7 +109,6 @@ "@mui/icons-material": "^5.15.10", "@mui/material": "^5.15.10", "@sillsdev/scripture": "^2.0.2", - "@thetypefounders/unicode-classifier": "^1.1.0", "ajv": "^8.17.1", "chalk": "^4.1.2", "chokidar": "^3.6.0", From 7899334e6a649fa3803512036d68ef0c96a6a231 Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Tue, 17 Dec 2024 10:44:07 +0100 Subject: [PATCH 6/7] Commented out logic --- .../punctuation-inventory.component.tsx | 372 +++++++++--------- 1 file changed, 189 insertions(+), 183 deletions(-) diff --git a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx index 873ed7ee5f..d6f37d3282 100644 --- a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx +++ b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx @@ -1,15 +1,16 @@ -import { LanguageStrings, LocalizeKey, ScriptureReference, substring } from 'platform-bible-utils'; +// import { LanguageStrings, LocalizeKey, ScriptureReference, substring } from 'platform-bible-utils'; +import { LanguageStrings, LocalizeKey, ScriptureReference } from 'platform-bible-utils'; import { Button, ColumnDef, Inventory, - InventoryItemOccurrence, + // InventoryItemOccurrence, InventoryTableData, Scope, - getBookNumFromId, - getLinesFromUSFM, - getNumberFromUSFM, - getStatusForItem, + // getBookNumFromId, + // getLinesFromUSFM, + // getNumberFromUSFM, + // getStatusForItem, inventoryCountColumn, inventoryItemColumn, inventoryStatusColumn, @@ -31,147 +32,151 @@ const PUNCTUATION_INVENTORY_STRING_KEYS: LocalizeKey[] = [ '%webView_inventory_table_punctuation_showSinglePunctuationCharacter%', ]; -const extractPunctuation = ( - showSequences: boolean, -): (( - text: string | undefined, - scriptureRef: ScriptureReference, - approvedItems: string[], - unapprovedItems: string[], -) => InventoryTableData[]) => { - return ( - text: string | undefined, - scriptureRef: ScriptureReference, - approvedItems: string[], - unapprovedItems: string[], - ) => { - if (!text) return []; +// Logic like this is probably not needed anymore after +// https://github.com/paranext/paranext-core/issues/1384 +// is fixed. I'll leave it in here for now, just in case - const tableData: InventoryTableData[] = []; +// const extractPunctuation = ( +// showSequences: boolean, +// ): (( +// text: string | undefined, +// scriptureRef: ScriptureReference, +// approvedItems: string[], +// unapprovedItems: string[], +// ) => InventoryTableData[]) => { +// return ( +// text: string | undefined, +// scriptureRef: ScriptureReference, +// approvedItems: string[], +// unapprovedItems: string[], +// ) => { +// if (!text) return []; - let currentBook: number | undefined = scriptureRef.bookNum; - let currentChapter: number | undefined = scriptureRef.chapterNum; - let currentVerse: number | undefined = scriptureRef.verseNum; +// const tableData: InventoryTableData[] = []; - // Matches all punctuation characters - const punctuationRegex: RegExp = showSequences ? /[\p{P}]+/gu : /[\p{P}]/gu; +// let currentBook: number | undefined = scriptureRef.bookNum; +// let currentChapter: number | undefined = scriptureRef.chapterNum; +// let currentVerse: number | undefined = scriptureRef.verseNum; - const lines = getLinesFromUSFM(text); +// // Matches all punctuation characters +// const punctuationRegex: RegExp = showSequences ? /[\p{P}]+/gu : /[\p{P}]/gu; - lines.forEach((line: string) => { - if (line.startsWith('\\id')) { - currentBook = getBookNumFromId(line); - currentChapter = 0; - currentVerse = 0; - } - if (line.startsWith('\\c')) { - currentChapter = getNumberFromUSFM(line); - currentVerse = 0; - } - if (line.startsWith('\\v')) { - currentVerse = getNumberFromUSFM(line); - if (currentChapter === 0) { - currentChapter = scriptureRef.chapterNum; - } - } +// const lines = getLinesFromUSFM(text); - let match: RegExpExecArray | undefined = punctuationRegex.exec(line) ?? undefined; - while (match) { - // For this code to work correctly we need our regular expression to match a single - // punctuation character per match - if (match.length > 1) - throw new Error('Multiple punctuation characters found in a single match'); +// lines.forEach((line: string) => { +// if (line.startsWith('\\id')) { +// currentBook = getBookNumFromId(line); +// currentChapter = 0; +// currentVerse = 0; +// } +// if (line.startsWith('\\c')) { +// currentChapter = getNumberFromUSFM(line); +// currentVerse = 0; +// } +// if (line.startsWith('\\v')) { +// currentVerse = getNumberFromUSFM(line); +// if (currentChapter === 0) { +// currentChapter = scriptureRef.chapterNum; +// } +// } - const punctuation = match[0]; - const { index } = match; +// let match: RegExpExecArray | undefined = punctuationRegex.exec(line) ?? undefined; +// while (match) { +// // For this code to work correctly we need our regular expression to match a single +// // punctuation character per match +// if (match.length > 1) +// throw new Error('Multiple punctuation characters found in a single match'); - let prefix = ''; - let suffix = ''; +// const punctuation = match[0]; +// const { index } = match; - // Check if preceding character is whitespace or not - if (index === 0) { - prefix = '_'; - } else { - for (let i = index - 1; i >= 0; i--) { - const precedingChar = line[i]; - if (/\s/.test(precedingChar)) { - prefix = '_'; - break; - } else if (!/[\p{P}]/u.test(precedingChar)) { - break; - } - } - } +// let prefix = ''; +// let suffix = ''; - // Check if following character is whitespace or not - if (index === line.length - punctuation.length) { - suffix = '_'; - } else { - for (let i = index + punctuation.length; i < line.length; i++) { - const followingChar = line[i]; - if (/\s/.test(followingChar)) { - suffix = '_'; - break; - } else if (!/[\p{P}]/u.test(followingChar)) { - break; - } - } - } +// // Check if preceding character is whitespace or not +// if (index === 0) { +// prefix = '_'; +// } else { +// for (let i = index - 1; i >= 0; i--) { +// const precedingChar = line[i]; +// if (/\s/.test(precedingChar)) { +// prefix = '_'; +// break; +// } else if (!/[\p{P}]/u.test(precedingChar)) { +// break; +// } +// } +// } - const item = `${prefix}${punctuation}${suffix}`; +// // Check if following character is whitespace or not +// if (index === line.length - punctuation.length) { +// suffix = '_'; +// } else { +// for (let i = index + punctuation.length; i < line.length; i++) { +// const followingChar = line[i]; +// if (/\s/.test(followingChar)) { +// suffix = '_'; +// break; +// } else if (!/[\p{P}]/u.test(followingChar)) { +// break; +// } +// } +// } - const itemIndex = match.index; - const existingItem = tableData.find((tableEntry) => tableEntry.items[0] === item); - const newReference: InventoryItemOccurrence = { - reference: { - bookNum: currentBook !== undefined ? currentBook : -1, - chapterNum: currentChapter !== undefined ? currentChapter : -1, - verseNum: currentVerse !== undefined ? currentVerse : -1, - }, - text: substring(line, Math.max(0, itemIndex - 25), Math.min(itemIndex + 25, line.length)), - }; - if (existingItem) { - existingItem.count += 1; - existingItem.occurrences.push(newReference); - } else { - const newItem: InventoryTableData = { - items: [item], - count: 1, - status: getStatusForItem(item, approvedItems, unapprovedItems), - occurrences: [newReference], - }; - tableData.push(newItem); - } +// const item = `${prefix}${punctuation}${suffix}`; - match = punctuationRegex.exec(line) ?? undefined; - } - }); +// const itemIndex = match.index; +// const existingItem = tableData.find((tableEntry) => tableEntry.items[0] === item); +// const newReference: InventoryItemOccurrence = { +// reference: { +// bookNum: currentBook !== undefined ? currentBook : -1, +// chapterNum: currentChapter !== undefined ? currentChapter : -1, +// verseNum: currentVerse !== undefined ? currentVerse : -1, +// }, +// text: substring(line, Math.max(0, itemIndex - 25), Math.min(itemIndex + 25, line.length)), +// }; +// if (existingItem) { +// existingItem.count += 1; +// existingItem.occurrences.push(newReference); +// } else { +// const newItem: InventoryTableData = { +// items: [item], +// count: 1, +// status: getStatusForItem(item, approvedItems, unapprovedItems), +// occurrences: [newReference], +// }; +// tableData.push(newItem); +// } - return tableData; - }; -}; +// match = punctuationRegex.exec(line) ?? undefined; +// } +// }); -function getPunctuationContext( - item: string, - isolatedLabel: string, - wordInitialLabel: string, - wordFinalLabel: string, - wordMedialLabel: string, -): string { - if (item.startsWith('_')) { - if (item.endsWith('_')) { - return isolatedLabel; - } +// return tableData; +// }; +// }; - return wordInitialLabel; - } +// function getPunctuationContext( +// item: string, +// isolatedLabel: string, +// wordInitialLabel: string, +// wordFinalLabel: string, +// wordMedialLabel: string, +// ): string { +// if (item.startsWith('_')) { +// if (item.endsWith('_')) { +// return isolatedLabel; +// } - if (item.endsWith('_')) { - return wordFinalLabel; - } +// return wordInitialLabel; +// } - return wordMedialLabel; -} +// if (item.endsWith('_')) { +// return wordFinalLabel; +// } + +// return wordMedialLabel; +// } /** * Function that constructs the column for the inventory component @@ -195,33 +200,33 @@ function getPunctuationContext( const createColumns = ( itemLabel: string, unicodeValueLabel: string, - contextLabel: string, + // contextLabel: string, countLabel: string, statusLabel: string, - isolatedLabel: string, - wordInitialLabel: string, - wordFinalLabel: string, - wordMedialLabel: string, + // isolatedLabel: string, + // wordInitialLabel: string, + // wordFinalLabel: string, + // wordMedialLabel: string, approvedItems: string[], onApprovedItemsChange: (items: string[]) => void, unapprovedItems: string[], onUnapprovedItemsChange: (items: string[]) => void, - showSequences: boolean, + // showSequences: boolean, ): ColumnDef[] => { - const contextColumn: ColumnDef = { - accessorKey: 'context', - header: () => , - cell: ({ row }) => { - const item: string = row.getValue('item'); - return getPunctuationContext( - item, - isolatedLabel, - wordInitialLabel, - wordFinalLabel, - wordMedialLabel, - ); - }, - }; + // const contextColumn: ColumnDef = { + // accessorKey: 'context', + // header: () => , + // cell: ({ row }) => { + // const item: string = row.getValue('item'); + // return getPunctuationContext( + // item, + // isolatedLabel, + // wordInitialLabel, + // wordFinalLabel, + // wordMedialLabel, + // ); + // }, + // }; return [ inventoryItemColumn(itemLabel), @@ -234,7 +239,7 @@ const createColumns = ( }, }, inventoryCountColumn(countLabel), - ...(showSequences ? [] : [contextColumn]), + // ...(showSequences ? [] : [contextColumn]), inventoryStatusColumn( statusLabel, approvedItems, @@ -279,10 +284,10 @@ function PunctuationInventory({ () => punctuationInventoryStrings['%webView_inventory_table_header_unicode_value%'], [punctuationInventoryStrings], ); - const contextLabel = useMemo( - () => punctuationInventoryStrings['%webView_inventory_table_header_context%'], - [punctuationInventoryStrings], - ); + // const contextLabel = useMemo( + // () => punctuationInventoryStrings['%webView_inventory_table_header_context%'], + // [punctuationInventoryStrings], + // ); const countLabel = useMemo( () => punctuationInventoryStrings['%webView_inventory_table_header_count%'], [punctuationInventoryStrings], @@ -291,22 +296,22 @@ function PunctuationInventory({ () => punctuationInventoryStrings['%webView_inventory_table_header_status%'], [punctuationInventoryStrings], ); - const isolatedLabel = useMemo( - () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_isolated%'], - [punctuationInventoryStrings], - ); - const wordInitialLabel = useMemo( - () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordInitial%'], - [punctuationInventoryStrings], - ); - const wordFinalLabel = useMemo( - () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordFinal%'], - [punctuationInventoryStrings], - ); - const wordMedialLabel = useMemo( - () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordMedial%'], - [punctuationInventoryStrings], - ); + // const isolatedLabel = useMemo( + // () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_isolated%'], + // [punctuationInventoryStrings], + // ); + // const wordInitialLabel = useMemo( + // () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordInitial%'], + // [punctuationInventoryStrings], + // ); + // const wordFinalLabel = useMemo( + // () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordFinal%'], + // [punctuationInventoryStrings], + // ); + // const wordMedialLabel = useMemo( + // () => punctuationInventoryStrings['%webView_inventory_table_punctuation_context_wordMedial%'], + // [punctuationInventoryStrings], + // ); const showSequencesLabel = useMemo( () => punctuationInventoryStrings['%webView_inventory_table_punctuation_showSequences%'], [punctuationInventoryStrings], @@ -326,34 +331,34 @@ function PunctuationInventory({ createColumns( itemLabel, unicodeValueLabel, - contextLabel, + // contextLabel, countLabel, statusLabel, - isolatedLabel, - wordInitialLabel, - wordFinalLabel, - wordMedialLabel, + // isolatedLabel, + // wordInitialLabel, + // wordFinalLabel, + // wordMedialLabel, approvedItems, onApprovedItemsChange, unapprovedItems, onUnapprovedItemsChange, - showSequences, + // showSequences, ), [ itemLabel, unicodeValueLabel, - contextLabel, + // contextLabel, countLabel, statusLabel, - isolatedLabel, - wordInitialLabel, - wordFinalLabel, - wordMedialLabel, + // isolatedLabel, + // wordInitialLabel, + // wordFinalLabel, + // wordMedialLabel, approvedItems, onApprovedItemsChange, unapprovedItems, onUnapprovedItemsChange, - showSequences, + // showSequences, ], ); @@ -372,7 +377,8 @@ function PunctuationInventory({ scriptureReference={scriptureReference} setScriptureReference={setScriptureReference} localizedStrings={localizedStrings} - extractItems={extractPunctuation(showSequences)} + // extractItems={extractPunctuation(showSequences)} + extractItems={/./g} approvedItems={approvedItems} unapprovedItems={unapprovedItems} text={text} From da3533a506d601b479f3f99a9533040b04fb9561 Mon Sep 17 00:00:00 2001 From: Rolf Heij Date: Tue, 17 Dec 2024 10:51:50 +0100 Subject: [PATCH 7/7] Make regex somewhat useful --- .../checks/inventories/punctuation-inventory.component.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx index d6f37d3282..97fcfd1365 100644 --- a/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx +++ b/extensions/src/platform-scripture/src/checks/inventories/punctuation-inventory.component.tsx @@ -32,6 +32,8 @@ const PUNCTUATION_INVENTORY_STRING_KEYS: LocalizeKey[] = [ '%webView_inventory_table_punctuation_showSinglePunctuationCharacter%', ]; +const punctuationRegex: RegExp = /[\p{P}]/gu; + // Logic like this is probably not needed anymore after // https://github.com/paranext/paranext-core/issues/1384 // is fixed. I'll leave it in here for now, just in case @@ -378,7 +380,7 @@ function PunctuationInventory({ setScriptureReference={setScriptureReference} localizedStrings={localizedStrings} // extractItems={extractPunctuation(showSequences)} - extractItems={/./g} + extractItems={punctuationRegex} approvedItems={approvedItems} unapprovedItems={unapprovedItems} text={text}