diff --git a/lib/core/custom/index.ts b/lib/core/custom/index.ts index b0adaeb..11b0dff 100644 --- a/lib/core/custom/index.ts +++ b/lib/core/custom/index.ts @@ -1,13 +1,34 @@ import { ACNormal, PatternsNormal } from '@/common/ac'; import { getStringLength } from '@/common/utils'; +import DICT1 from '@/data/dict1'; let customDict: { [key: string]: string } = {}; +let customMultipleDict: string[] = []; +let customPolyphonicDict: string[] = []; + +type CustomHandleType = 'add' | 'replace'; + +type CustomDictType = 'pinyin' | 'multiple' | 'polyphonic'; + +interface CustomPinyinOptions { + /** + * @description: multiple 对于 customPinyin 补充词汇的处理 + */ + multiple?: CustomHandleType; + /** + * @description: polyphonic 对于 customPinyin 补充词汇的处理 + */ + polyphonic?: CustomHandleType; +} /** * @description: 用户自定义拼音 * @param {{ [key: string]: string }} config 用户自定义的拼音映射(支持汉字、词语、句子的映射),若匹配到该映射,优先将汉字转换为该映射 + * @param {CustomPinyinOptions} options multiple/polyphonic 对于 customPinyin 补充词汇的处理 */ -export function customPinyin(config: { [key: string]: string } = {}) { - customDict = {}; +export function customPinyin( + config: { [key: string]: string } = {}, + options?: CustomPinyinOptions +) { const keys = Object.keys(config).sort( (key1, key2) => getStringLength(key2) - getStringLength(key1) ); @@ -20,15 +41,75 @@ export function customPinyin(config: { [key: string]: string } = {}) { priority: 999 + getStringLength(key), length: key.length, })); - ACNormal.reset(); - ACNormal.buildTrie([...PatternsNormal, ...customPatterns]); + ACNormal.buildTrie(customPatterns); ACNormal.buildFailPointer(); + // add words for multiple and polyphonic + if (options?.multiple) { + addCustomConfigToDict(config, customMultipleDict, options.multiple); + } + if (options?.polyphonic) { + addCustomConfigToDict(config, customPolyphonicDict, options.polyphonic); + } +} + +function addCustomConfigToDict( + config: { [key: string]: string }, + dict: string[], + handleType: CustomHandleType +) { + for (let key in config) { + const pinyins = config[key]; + key.split('').forEach((word, index) => { + const pinyin = pinyins.split(' ')?.[index] || ''; + const wordCode = word.charCodeAt(0); + if (handleType === 'replace') { + // 直接覆盖原词典 + dict[wordCode] = pinyin; + } else if (handleType === 'add') { + // 补充至原词典 + dict[wordCode] = dict[wordCode] || DICT1[wordCode]; + if (!dict[wordCode].split(' ').includes(pinyin)) { + dict[wordCode] += ` ${pinyin}`; + dict[wordCode] = dict[wordCode].trim(); + } + } + }); + } } export const getCustomDict = () => { return customDict; }; +export const getCustomMultpileDict = () => { + return customMultipleDict; +}; + +export const getCustomPolyphonicDict = () => { + return customPolyphonicDict; +}; + +export function clearCustomDict(dict: CustomDictType | CustomDictType[]) { + if (!dict) { + console.error('The parameter of clearCustomDict is not correct.'); + return; + } + if (dict === 'pinyin' || dict.indexOf?.('pinyin') !== -1) { + Object.keys(customDict).forEach(function (key) { + delete customDict[key]; + }); + ACNormal.reset(); + ACNormal.buildTrie([...PatternsNormal]); + ACNormal.buildFailPointer(); + } + if (dict === 'multiple' || dict.indexOf?.('multiple') !== -1) { + customMultipleDict.length = 0; + } + if (dict === 'polyphonic' || dict.indexOf?.('polyphonic') !== -1) { + customPolyphonicDict.length = 0; + } +} + export function hasCustomConfig() { return !!Object.keys(customDict).length; } diff --git a/lib/core/pinyin/handle.ts b/lib/core/pinyin/handle.ts index 4eb1d61..9dc879b 100644 --- a/lib/core/pinyin/handle.ts +++ b/lib/core/pinyin/handle.ts @@ -7,7 +7,7 @@ import { } from '@/data/special'; import Surnames from '@/data/surname'; import DICT1 from '@/data/dict1'; -import { getCustomDict } from '@/core/custom'; +import { getCustomMultpileDict } from '@/core/custom'; import type { SingleWordResult, PinyinMode } from '../../common/type'; import { ACNormal } from '@/common/ac'; import { @@ -131,9 +131,9 @@ type GetMultiplePinyin = ( ) => SingleWordResult[]; const getMultiplePinyin: GetMultiplePinyin = (word, mode = 'normal') => { const wordCode = word.charCodeAt(0); - const customDict = getCustomDict(); + const customMultpileDict = getCustomMultpileDict(); const pinyin = - customDict[word] || + customMultpileDict[wordCode] || (mode === 'surname' ? Surnames[word] : '') || DICT1[wordCode] || ''; diff --git a/lib/core/polyphonic/index.ts b/lib/core/polyphonic/index.ts index 4d8b250..6db22e8 100644 --- a/lib/core/polyphonic/index.ts +++ b/lib/core/polyphonic/index.ts @@ -14,6 +14,7 @@ import { getFirstLetter, getFinalParts, } from '@/core/pinyin/handle'; +import { getCustomPolyphonicDict } from '../custom'; interface BasicOptions { /** @@ -205,7 +206,8 @@ function polyphonic( const getPolyphonicList = (text: string): SingleWordResult[] => { return text.split('').map((word) => { const wordCode = word.charCodeAt(0); - const pinyin = DICT1[wordCode] || ''; + const customPolyphonicDict = getCustomPolyphonicDict(); + const pinyin = customPolyphonicDict[wordCode] || DICT1[wordCode] || ''; return { origin: word, result: pinyin, diff --git a/lib/index.ts b/lib/index.ts index 37d13c7..10a9ed2 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -1,5 +1,5 @@ export { pinyin } from './core/pinyin'; -export { customPinyin } from './core/custom'; +export { customPinyin, clearCustomDict } from './core/custom'; export { match } from './core/match'; export { html } from './core/html'; export { polyphonic } from './core/polyphonic'; diff --git a/test/custom.test.js b/test/custom.test.js index 370c446..7c08ccf 100644 --- a/test/custom.test.js +++ b/test/custom.test.js @@ -1,12 +1,16 @@ -const { pinyin, customPinyin } = require('../'); +const { pinyin, customPinyin, clearCustomDict, polyphonic } = require('../'); const expect = require('chai').expect; +function clearAllCustomDicts() { + clearCustomDict(['pinyin', 'multiple', 'polyphonic']); +} + describe('customConfig', () => { it('[custom]custom none', () => { customPinyin(); const result = pinyin('干一行行一行'); expect(result).to.be.equal('gān yī xíng xíng yī xíng'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom1', () => { @@ -15,7 +19,7 @@ describe('customConfig', () => { }); const result = pinyin('我姓能'); expect(result).to.be.equal('wǒ xìng nài'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom2', () => { @@ -24,7 +28,7 @@ describe('customConfig', () => { }); const result = pinyin('爱好好多'); expect(result).to.be.equal('ài hào hǎo duō'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom3', () => { @@ -33,7 +37,7 @@ describe('customConfig', () => { }); const result = pinyin('哈什玛'); expect(result).to.be.equal('hà shén mǎ'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom4', () => { @@ -42,7 +46,7 @@ describe('customConfig', () => { }); const result = pinyin('暴虎冯河'); expect(result).to.be.equal('bào hǔ píng hé'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom>5', () => { @@ -51,7 +55,7 @@ describe('customConfig', () => { }); const result = pinyin('干一行行一行'); expect(result).to.be.equal('gàn yī háng xíng yī háng'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom with surname', () => { @@ -66,7 +70,7 @@ describe('customConfig', () => { const result2 = pinyin('啊乐嘉是', { mode: 'surname' }); expect(result2).to.be.equal('a lè jiā shì'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]customs', () => { @@ -76,7 +80,7 @@ describe('customConfig', () => { }); const result = pinyin('好好'); expect(result).to.be.equal('hào hǎo'); - customPinyin({}); + clearAllCustomDicts(); }); it('[custom]custom with multiple', () => { @@ -89,8 +93,8 @@ describe('customConfig', () => { nonZh: 'removed', toneType: 'num', }); - expect(result).to.deep.equal(['en0']); - customPinyin({}); + expect(result).to.deep.equal(['ng4', 'ng2', 'ng3']); + clearAllCustomDicts(); }); it('[custom] ac high level', () => { @@ -99,24 +103,84 @@ describe('customConfig', () => { }); const result = pinyin('银行'); expect(result).to.be.equal('yin hang'); - customPinyin({}); + clearAllCustomDicts(); }); - it('[custom] double unicode', () => { + it('[custom] double unicode1', () => { customPinyin({ 𧒽: 'lei', }); const result = pinyin('𧒽沙发𧒽𧒽𧒽算法是'); expect(result).to.be.equal('lei shā fā lei lei lei suàn fǎ shì'); - customPinyin({}); + clearAllCustomDicts(); }); - it('[custom] double unicode', () => { + it('[custom] double unicode2', () => { customPinyin({ 𧒽𧒽: 'lei ke', }); const result = pinyin('𧒽沙发𧒽𧒽𧒽算法是'); expect(result).to.be.equal('𧒽 shā fā lei ke 𧒽 suàn fǎ shì'); - customPinyin({}); + clearAllCustomDicts(); + }); +}); + + +describe('custom for multiple', () => { + it('[custom]custom multiple1', () => { + customPinyin({ + 你好: 'mi sao' + }, { + multiple: 'add' + }); + const result = pinyin('你', { multiple: true }); + expect(result).to.be.equal('nǐ mi'); + clearAllCustomDicts(); + }); + + it('[custom]custom multiple2', () => { + customPinyin({ + 你好: 'mi kao' + }, { + multiple: 'add' + }); + const result = pinyin('好', { multiple: true }); + expect(result).to.be.equal('hǎo hào kao'); + clearAllCustomDicts(); + }); + + it('[custom]custom multiple duplicated', () => { + customPinyin({ + 你好: 'mi hǎo' + }, { + multiple: 'add' + }); + const result = pinyin('好', { multiple: true }); + expect(result).to.be.equal('hǎo hào'); + clearAllCustomDicts(); + }); + + it('[custom]custom multiple replace', () => { + customPinyin({ + 你好: 'mi kao' + }, { + multiple: 'replace' + }); + const result = pinyin('好', { multiple: true }); + expect(result).to.be.equal('kao'); + clearAllCustomDicts(); + }); +}); + +describe('custom for polyphonic', () => { + it('[custom]custom polyphonic1', () => { + customPinyin({ + 你好: 'mi kao' + }, { + polyphonic: 'add' + }); + const result = polyphonic('好好学习'); + expect(result).to.deep.equal(['hǎo hào kao', 'hǎo hào kao', 'xué', 'xí']); + clearAllCustomDicts(); }); }); diff --git a/types/core/custom/index.d.ts b/types/core/custom/index.d.ts index ebd87c9..224ea8d 100644 --- a/types/core/custom/index.d.ts +++ b/types/core/custom/index.d.ts @@ -1,11 +1,28 @@ +type CustomHandleType = 'add' | 'replace'; +type CustomDictType = 'pinyin' | 'multiple' | 'polyphonic'; +interface CustomPinyinOptions { + /** + * @description: multiple 对于 customPinyin 补充词汇的处理 + */ + multiple?: CustomHandleType; + /** + * @description: polyphonic 对于 customPinyin 补充词汇的处理 + */ + polyphonic?: CustomHandleType; +} /** * @description: 用户自定义拼音 * @param {{ [key: string]: string }} config 用户自定义的拼音映射(支持汉字、词语、句子的映射),若匹配到该映射,优先将汉字转换为该映射 + * @param {CustomPinyinOptions} options multiple/polyphonic 对于 customPinyin 补充词汇的处理 */ export declare function customPinyin(config?: { [key: string]: string; -}): void; +}, options?: CustomPinyinOptions): void; export declare const getCustomDict: () => { [key: string]: string; }; +export declare const getCustomMultpileDict: () => string[]; +export declare const getCustomPolyphonicDict: () => string[]; +export declare function clearCustomDict(dict: CustomDictType | CustomDictType[]): void; export declare function hasCustomConfig(): boolean; +export {}; diff --git a/types/index.d.ts b/types/index.d.ts index 90190bf..1b6e1c1 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -1,5 +1,5 @@ export { pinyin } from './core/pinyin'; -export { customPinyin } from './core/custom'; +export { customPinyin, clearCustomDict } from './core/custom'; export { match } from './core/match'; export { html } from './core/html'; export { polyphonic } from './core/polyphonic';