Skip to content

Commit

Permalink
feat: custom effect for multiple and polyphonic
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-lx committed Jan 10, 2024
1 parent f2929b3 commit ecb131b
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 27 deletions.
89 changes: 85 additions & 4 deletions lib/core/custom/index.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,34 @@
import { ACNormal, PatternsNormal } from '@/common/ac';
import { getStringLength } from '@/common/utils';
import DICT1 from '@/data/dict1';
let customDict: { [key: string]: string } = {};
let customMultipleDict: string[] = [];
let customPolyphonicDict: string[] = [];

type CustomHandleType = 'add' | 'replace';

type CustomDictType = 'pinyin' | 'multiple' | 'polyphonic';

interface CustomPinyinOptions {
/**
* @description: multiple 对于 customPinyin 补充词汇的处理
*/
multiple?: CustomHandleType;
/**
* @description: polyphonic 对于 customPinyin 补充词汇的处理
*/
polyphonic?: CustomHandleType;
}

/**
* @description: 用户自定义拼音
* @param {{ [key: string]: string }} config 用户自定义的拼音映射(支持汉字、词语、句子的映射),若匹配到该映射,优先将汉字转换为该映射
* @param {CustomPinyinOptions} options multiple/polyphonic 对于 customPinyin 补充词汇的处理
*/
export function customPinyin(config: { [key: string]: string } = {}) {
customDict = {};
export function customPinyin(
config: { [key: string]: string } = {},
options?: CustomPinyinOptions
) {
const keys = Object.keys(config).sort(
(key1, key2) => getStringLength(key2) - getStringLength(key1)
);
Expand All @@ -20,15 +41,75 @@ export function customPinyin(config: { [key: string]: string } = {}) {
priority: 999 + getStringLength(key),
length: key.length,
}));
ACNormal.reset();
ACNormal.buildTrie([...PatternsNormal, ...customPatterns]);
ACNormal.buildTrie(customPatterns);
ACNormal.buildFailPointer();
// add words for multiple and polyphonic
if (options?.multiple) {
addCustomConfigToDict(config, customMultipleDict, options.multiple);
}
if (options?.polyphonic) {
addCustomConfigToDict(config, customPolyphonicDict, options.polyphonic);
}
}

function addCustomConfigToDict(
config: { [key: string]: string },
dict: string[],
handleType: CustomHandleType
) {
for (let key in config) {
const pinyins = config[key];
key.split('').forEach((word, index) => {
const pinyin = pinyins.split(' ')?.[index] || '';
const wordCode = word.charCodeAt(0);
if (handleType === 'replace') {
// 直接覆盖原词典
dict[wordCode] = pinyin;
} else if (handleType === 'add') {
// 补充至原词典
dict[wordCode] = dict[wordCode] || DICT1[wordCode];
if (!dict[wordCode].split(' ').includes(pinyin)) {
dict[wordCode] += ` ${pinyin}`;
dict[wordCode] = dict[wordCode].trim();
}
}
});
}
}

export const getCustomDict = () => {
return customDict;
};

export const getCustomMultpileDict = () => {
return customMultipleDict;
};

export const getCustomPolyphonicDict = () => {
return customPolyphonicDict;
};

export function clearCustomDict(dict: CustomDictType | CustomDictType[]) {
if (!dict) {
console.error('The parameter of clearCustomDict is not correct.');
return;
}
if (dict === 'pinyin' || dict.indexOf?.('pinyin') !== -1) {
Object.keys(customDict).forEach(function (key) {
delete customDict[key];
});
ACNormal.reset();
ACNormal.buildTrie([...PatternsNormal]);
ACNormal.buildFailPointer();
}
if (dict === 'multiple' || dict.indexOf?.('multiple') !== -1) {
customMultipleDict.length = 0;
}
if (dict === 'polyphonic' || dict.indexOf?.('polyphonic') !== -1) {
customPolyphonicDict.length = 0;
}
}

export function hasCustomConfig() {
return !!Object.keys(customDict).length;
}
6 changes: 3 additions & 3 deletions lib/core/pinyin/handle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
} from '@/data/special';
import Surnames from '@/data/surname';
import DICT1 from '@/data/dict1';
import { getCustomDict } from '@/core/custom';
import { getCustomMultpileDict } from '@/core/custom';
import type { SingleWordResult, PinyinMode } from '../../common/type';
import { ACNormal } from '@/common/ac';
import {
Expand Down Expand Up @@ -131,9 +131,9 @@ type GetMultiplePinyin = (
) => SingleWordResult[];
const getMultiplePinyin: GetMultiplePinyin = (word, mode = 'normal') => {
const wordCode = word.charCodeAt(0);
const customDict = getCustomDict();
const customMultpileDict = getCustomMultpileDict();
const pinyin =
customDict[word] ||
customMultpileDict[wordCode] ||
(mode === 'surname' ? Surnames[word] : '') ||
DICT1[wordCode] ||
'';
Expand Down
4 changes: 3 additions & 1 deletion lib/core/polyphonic/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
getFirstLetter,
getFinalParts,
} from '@/core/pinyin/handle';
import { getCustomPolyphonicDict } from '../custom';

interface BasicOptions {
/**
Expand Down Expand Up @@ -205,7 +206,8 @@ function polyphonic(
const getPolyphonicList = (text: string): SingleWordResult[] => {
return text.split('').map((word) => {
const wordCode = word.charCodeAt(0);
const pinyin = DICT1[wordCode] || '';
const customPolyphonicDict = getCustomPolyphonicDict();
const pinyin = customPolyphonicDict[wordCode] || DICT1[wordCode] || '';
return {
origin: word,
result: pinyin,
Expand Down
2 changes: 1 addition & 1 deletion lib/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export { pinyin } from './core/pinyin';
export { customPinyin } from './core/custom';
export { customPinyin, clearCustomDict } from './core/custom';
export { match } from './core/match';
export { html } from './core/html';
export { polyphonic } from './core/polyphonic';
Expand Down
96 changes: 80 additions & 16 deletions test/custom.test.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
const { pinyin, customPinyin } = require('../');
const { pinyin, customPinyin, clearCustomDict, polyphonic } = require('../');
const expect = require('chai').expect;

function clearAllCustomDicts() {
clearCustomDict(['pinyin', 'multiple', 'polyphonic']);
}

describe('customConfig', () => {
it('[custom]custom none', () => {
customPinyin();
const result = pinyin('干一行行一行');
expect(result).to.be.equal('gān yī xíng xíng yī xíng');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom1', () => {
Expand All @@ -15,7 +19,7 @@ describe('customConfig', () => {
});
const result = pinyin('我姓能');
expect(result).to.be.equal('wǒ xìng nài');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom2', () => {
Expand All @@ -24,7 +28,7 @@ describe('customConfig', () => {
});
const result = pinyin('爱好好多');
expect(result).to.be.equal('ài hào hǎo duō');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom3', () => {
Expand All @@ -33,7 +37,7 @@ describe('customConfig', () => {
});
const result = pinyin('哈什玛');
expect(result).to.be.equal('hà shén mǎ');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom4', () => {
Expand All @@ -42,7 +46,7 @@ describe('customConfig', () => {
});
const result = pinyin('暴虎冯河');
expect(result).to.be.equal('bào hǔ píng hé');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom>5', () => {
Expand All @@ -51,7 +55,7 @@ describe('customConfig', () => {
});
const result = pinyin('干一行行一行');
expect(result).to.be.equal('gàn yī háng xíng yī háng');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom with surname', () => {
Expand All @@ -66,7 +70,7 @@ describe('customConfig', () => {

const result2 = pinyin('啊乐嘉是', { mode: 'surname' });
expect(result2).to.be.equal('a lè jiā shì');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]customs', () => {
Expand All @@ -76,7 +80,7 @@ describe('customConfig', () => {
});
const result = pinyin('好好');
expect(result).to.be.equal('hào hǎo');
customPinyin({});
clearAllCustomDicts();
});

it('[custom]custom with multiple', () => {
Expand All @@ -89,8 +93,8 @@ describe('customConfig', () => {
nonZh: 'removed',
toneType: 'num',
});
expect(result).to.deep.equal(['en0']);
customPinyin({});
expect(result).to.deep.equal(['ng4', 'ng2', 'ng3']);
clearAllCustomDicts();
});

it('[custom] ac high level', () => {
Expand All @@ -99,24 +103,84 @@ describe('customConfig', () => {
});
const result = pinyin('银行');
expect(result).to.be.equal('yin hang');
customPinyin({});
clearAllCustomDicts();
});

it('[custom] double unicode', () => {
it('[custom] double unicode1', () => {
customPinyin({
𧒽: 'lei',
});
const result = pinyin('𧒽沙发𧒽𧒽𧒽算法是');
expect(result).to.be.equal('lei shā fā lei lei lei suàn fǎ shì');
customPinyin({});
clearAllCustomDicts();
});

it('[custom] double unicode', () => {
it('[custom] double unicode2', () => {
customPinyin({
𧒽𧒽: 'lei ke',
});
const result = pinyin('𧒽沙发𧒽𧒽𧒽算法是');
expect(result).to.be.equal('𧒽 shā fā lei ke 𧒽 suàn fǎ shì');
customPinyin({});
clearAllCustomDicts();
});
});


describe('custom for multiple', () => {
it('[custom]custom multiple1', () => {
customPinyin({
你好: 'mi sao'
}, {
multiple: 'add'
});
const result = pinyin('你', { multiple: true });
expect(result).to.be.equal('nǐ mi');
clearAllCustomDicts();
});

it('[custom]custom multiple2', () => {
customPinyin({
你好: 'mi kao'
}, {
multiple: 'add'
});
const result = pinyin('好', { multiple: true });
expect(result).to.be.equal('hǎo hào kao');
clearAllCustomDicts();
});

it('[custom]custom multiple duplicated', () => {
customPinyin({
你好: 'mi hǎo'
}, {
multiple: 'add'
});
const result = pinyin('好', { multiple: true });
expect(result).to.be.equal('hǎo hào');
clearAllCustomDicts();
});

it('[custom]custom multiple replace', () => {
customPinyin({
你好: 'mi kao'
}, {
multiple: 'replace'
});
const result = pinyin('好', { multiple: true });
expect(result).to.be.equal('kao');
clearAllCustomDicts();
});
});

describe('custom for polyphonic', () => {
it('[custom]custom polyphonic1', () => {
customPinyin({
你好: 'mi kao'
}, {
polyphonic: 'add'
});
const result = polyphonic('好好学习');
expect(result).to.deep.equal(['hǎo hào kao', 'hǎo hào kao', 'xué', 'xí']);
clearAllCustomDicts();
});
});
19 changes: 18 additions & 1 deletion types/core/custom/index.d.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,28 @@
type CustomHandleType = 'add' | 'replace';
type CustomDictType = 'pinyin' | 'multiple' | 'polyphonic';
interface CustomPinyinOptions {
/**
* @description: multiple 对于 customPinyin 补充词汇的处理
*/
multiple?: CustomHandleType;
/**
* @description: polyphonic 对于 customPinyin 补充词汇的处理
*/
polyphonic?: CustomHandleType;
}
/**
* @description: 用户自定义拼音
* @param {{ [key: string]: string }} config 用户自定义的拼音映射(支持汉字、词语、句子的映射),若匹配到该映射,优先将汉字转换为该映射
* @param {CustomPinyinOptions} options multiple/polyphonic 对于 customPinyin 补充词汇的处理
*/
export declare function customPinyin(config?: {
[key: string]: string;
}): void;
}, options?: CustomPinyinOptions): void;
export declare const getCustomDict: () => {
[key: string]: string;
};
export declare const getCustomMultpileDict: () => string[];
export declare const getCustomPolyphonicDict: () => string[];
export declare function clearCustomDict(dict: CustomDictType | CustomDictType[]): void;
export declare function hasCustomConfig(): boolean;
export {};
2 changes: 1 addition & 1 deletion types/index.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export { pinyin } from './core/pinyin';
export { customPinyin } from './core/custom';
export { customPinyin, clearCustomDict } from './core/custom';
export { match } from './core/match';
export { html } from './core/html';
export { polyphonic } from './core/polyphonic';
Expand Down

0 comments on commit ecb131b

Please sign in to comment.