Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 优化拼音及部分词库 #186

Merged
merged 1 commit into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ export function getSplittedWord(string: string) {
}

export function isZhChar(char: string) {
if (typeof char !== 'string') {
return false;
}
let code = char.charCodeAt(0);
return code >= 19968 && code <= 40869;
}
4 changes: 2 additions & 2 deletions lib/core/pinyin/handle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {
SpecialFinalMap,
SpecialFinalList,
doubleFinalList,
getSepecialChangeTone,
processSepecialPinyin,
} from '@/data/special';
import Surnames from '@/data/surname';
import DICT1 from '@/data/dict1';
Expand Down Expand Up @@ -91,7 +91,7 @@ export const getPinyin = (
} else {
const char = word[i];
let pinyin: string = '';
pinyin = getSepecialChangeTone(char, word[i - 1], word[i + 1]);
pinyin = processSepecialPinyin(char, word[i - 1], word[i + 1]);
list[i] = {
origin: char,
result: pinyin,
Expand Down
2 changes: 1 addition & 1 deletion lib/data/dict2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2115,7 +2115,7 @@ const DICT2: { [prop: string]: string } = {
刺猬: 'cì wei',
麦子: 'mài zi',
队伍: 'duì wu',
知了: 'zhī liao',
知了: 'zhī liǎo',
鱼儿: 'yú er',
馄饨: 'hún tun',
灯笼: 'dēng long',
Expand Down
1 change: 1 addition & 0 deletions lib/data/dict3.ts
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ const DICT3: { [prop: string]: string } = {
咳特灵: 'ké tè líng',
开户行: 'kāi hù háng',
郦食其: 'lì yì jī',
花事了: 'huā shì liǎo',
};
export default DICT3;
export const Pattern3: Pattern[] = Object.keys(DICT3).map((key) => ({
Expand Down
73 changes: 48 additions & 25 deletions lib/data/special.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import type { Pattern } from "../common/ac";
import { Priority } from "@/common/constant";
import { getSingleWordPinyin, getNumOfTone, getPinyinWithoutTone } from '../core/pinyin/handle';
import type { Pattern } from '../common/ac';
import { Priority } from '@/common/constant';
import {
getSingleWordPinyin,
getNumOfTone,
getPinyinWithoutTone,
} from '@/core/pinyin/handle';
import { isZhChar } from '@/common/utils';

export const InitialList = [
'zh',
Expand Down Expand Up @@ -115,6 +120,7 @@ const Numbers = {
千: 'qiān',
万: 'wàn',
亿: 'yì',
单: 'dān',
两: 'liǎng',
双: 'shuāng',
多: 'duō',
Expand All @@ -129,7 +135,7 @@ const NumberWordMap = {
斗: 'dǒu',
};
function genNumberDict() {
const dict: { [key: string]: string; } = {
const dict: { [key: string]: string } = {
十一: 'shí yī',
零一: 'líng yī',
第一: 'dì yī',
Expand All @@ -146,43 +152,46 @@ function genNumberDict() {
return dict;
}
const NumberDict = genNumberDict();
export const PatternNumberDict: Pattern[] = Object.keys(NumberDict).map((key) => ({
zh: key,
pinyin: NumberDict[key],
priority: Priority.DictNumber + key.length,
length: key.length,
}));


export const PatternNumberDict: Pattern[] = Object.keys(NumberDict).map(
(key) => ({
zh: key,
pinyin: NumberDict[key],
priority: Priority.DictNumber + key.length,
length: key.length,
})
);

/**
* @description: 特殊变调处理:https://zh.wiktionary.org/wiki/Appendix:%E2%80%9C%E4%B8%80%E2%80%9D%E5%8F%8A%E2%80%9C%E4%B8%8D%E2%80%9D%E7%9A%84%E5%8F%98%E8%B0%83
*/
const SpecialChangeToneMap = {
const inflectionMap = {
// 说不说,说一说,叠词之间发音为轻声
不: {
'bú': [4], // "不" 后面跟 4 声时,变调为 2 声
: [4], // "不" 后面跟 4 声时,变调为 2 声
},
一: {
'yí': [4], // "一" 后面跟 4 声时,变调为 2 声
'yì': [1, 2, 3],
}
}
const SpecialChangeToneIgnoreSuffix = ['的', '地', '而', '之', '后', '也', '还'];
export const SpecialChangeToneList = Object.keys(SpecialChangeToneMap);
export function getSepecialChangeTone(cur: string, pre: string, next: string) {
if (SpecialChangeToneList.indexOf(cur) === -1) {
yí: [4], // "一" 后面跟 4 声时,变调为 2 声
yì: [1, 2, 3],
},
};
const inflectionIgnoreSuffix = ['的', '地', '而', '之', '后', '也', '还'];
export const inflectionList = Object.keys(inflectionMap);

// 处理 一、不 的变调
export function processInflection(cur: string, pre: string, next: string) {
if (inflectionList.indexOf(cur) === -1) {
return getSingleWordPinyin(cur);
}
// 说不说,说一说,叠词之间发音为轻声
if (pre === next && getSingleWordPinyin(pre) !== pre) {
return getPinyinWithoutTone(getSingleWordPinyin(cur));
}
if (next && !SpecialChangeToneIgnoreSuffix.includes(next)) {
// 一、不的变调处理
if (next && !inflectionIgnoreSuffix.includes(next)) {
const nextPinyin = getSingleWordPinyin(next);
if (nextPinyin !== next) {
const nextTone = getNumOfTone(nextPinyin);
const pinyinMap = SpecialChangeToneMap[cur as keyof typeof SpecialChangeToneMap];
const pinyinMap = inflectionMap[cur as keyof typeof inflectionMap];
for (let pinyin in pinyinMap) {
const tones = pinyinMap[pinyin as keyof typeof pinyinMap] as number[];
if (tones.indexOf(Number(nextTone)) !== -1) {
Expand All @@ -191,5 +200,19 @@ export function getSepecialChangeTone(cur: string, pre: string, next: string) {
}
}
}
return getSingleWordPinyin(cur);
}

// 处理 了
export function processInflectionLiao(cur: string, pre: string) {
if (cur === '了' && !isZhChar(pre)) {
return 'liǎo';
}
}

export function processSepecialPinyin(cur: string, pre: string, next: string) {
return (
processInflectionLiao(cur, pre) ||
processInflection(cur, pre, next) ||
getSingleWordPinyin(cur)
);
}
47 changes: 20 additions & 27 deletions lib/data/surname.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { Priority } from '@/common/constant';
import type { Pattern } from '../../lib/common/ac';
const Surnames: { [key: string]: string } = {
// TODO: 姓氏里有很多不是多音字或者是默认读音,可以从字典中去掉
南宫: 'nán gōng',
第五: 'dì wǔ',
万俟: 'mò qí',
Expand All @@ -13,10 +12,10 @@ const Surnames: { [key: string]: string } = {
闻人: 'wén rén',
东方: 'dōng fāng',
赫连: 'hè lián',
皇甫: 'huáng fǔ', // 如晚唐诗人皇甫松
皇甫: 'huáng fǔ',
尉迟: 'yù chí',
公羊: 'gōng yáng',
澹台: 'tán tái', // 如孔子弟子澹台灭明
澹台: 'tán tái',
公冶: 'gōng yě',
宗政: 'zōng zhèng',
濮阳: 'pú yáng',
Expand Down Expand Up @@ -61,6 +60,8 @@ const Surnames: { [key: string]: string } = {
左丘: 'zuǒ qiū',
东门: 'dōng mén',
西门: 'xī mén',
句龙: 'gōu lóng', // 如宋有句龙如渊,https://baike.baidu.com/item/%E5%8F%A5%E9%BE%99/1160043
毌丘: 'guàn qiū',
赵: 'zhào',
钱: 'qián',
孙: 'sūn',
Expand Down Expand Up @@ -141,7 +142,7 @@ const Surnames: { [key: string]: string } = {
邬: 'wū',
安: 'ān',
常: 'cháng',
乐: 'yuè lè', // 乐有两个读音【Yuè】和【Lè】。乐(Yuè)姓的名人有古代的军事家乐毅,而乐(Lè)姓如主持人乐嘉
乐: 'yuè',
于: 'yú',
时: 'shí',
傅: 'fù',
Expand Down Expand Up @@ -225,7 +226,7 @@ const Surnames: { [key: string]: string } = {
万: 'wàn',
支: 'zhī',
柯: 'kē',
昝: 'zǎn', // 如清代书画家昝茹颖
昝: 'zǎn',
管: 'guǎn',
卢: 'lú',
莫: 'mò',
Expand All @@ -234,15 +235,15 @@ const Surnames: { [key: string]: string } = {
裘: 'qiú',
缪: 'miào',
干: 'gān',
解: 'xiè', // 明代:解缙
解: 'xiè',
应: 'yīng',
宗: 'zōng',
丁: 'dīng',
宣: 'xuān',
贲: 'bēn',
邓: 'dèng',
郁: 'yù',
单: 'shàn', // 单雄信
单: 'shàn',
杭: 'háng',
洪: 'hóng',
包: 'bāo',
Expand All @@ -266,7 +267,7 @@ const Surnames: { [key: string]: string } = {
於: 'yū',
惠: 'huì',
甄: 'zhēn',
曲: 'qū', // 如唐代司空曲环
曲: 'qū',
家: 'jiā',
封: 'fēng',
芮: 'ruì',
Expand All @@ -286,7 +287,7 @@ const Surnames: { [key: string]: string } = {
巴: 'bā',
弓: 'gōng',
牧: 'mù',
隗: 'kuí, wěi', // 一读【kuí】,一读【wěi】
隗: 'wěi',
山: 'shān',
谷: 'gǔ',
车: 'chē',
Expand Down Expand Up @@ -456,16 +457,16 @@ const Surnames: { [key: string]: string } = {
巢: 'cháo',
关: 'guān',
蒯: 'kuǎi',
相: 'xiàng xiāng',
查: 'zhā', // 如金庸原名查良镛,也有读 chá 一说
相: 'xiàng',
查: 'zhā',
后: 'hòu',
荆: 'jīng',
红: 'hóng',
游: 'yóu',
竺: 'zhú',
权: 'quán',
逯: 'lù', // 如汉代大臣逯普
盖: 'gě gài guō guó', // 一读【gě】,一读【gài】。一般念【gě】,如现代京剧表演艺术家盖叫天。姓氏中也有读 guō、guó 一说,出自:https://weibo.com/7211561239/JdpQAzFoh?type=repost
逯: 'lù',
盖: 'gě',
益: 'yì',
桓: 'huán',
公: 'gōng',
Expand All @@ -474,28 +475,20 @@ const Surnames: { [key: string]: string } = {
言: 'yán',
福: 'fú',
肖: 'xiāo',
区: 'ōu', // 如柳宗元《童区寄传》中的区寄
覃: 'qín tán', // 一读【tán】,一读【qín】;一般读【qín】
区: 'ōu',
覃: 'qín',
朴: 'piáo',

// 增补 1
// 来自:《别再叫错人啦!这些易读错的姓氏需要好好学习一下!》
// http://m.xinhuanet.com/ah/2018-04/19/c_1122709044.htm
繁: 'pó', // 如写《定情诗》的汉末诗人繁钦
员: 'yùn', // 如唐代诗人员半千
句: 'gōu', // 如宋代进士句克俭
句龙: 'gōu lóng', // 如宋有句龙如渊,https://baike.baidu.com/item/%E5%8F%A5%E9%BE%99/1160043
要: 'yāo', // https://baike.baidu.com/item/%E8%A6%81%E5%A7%93/9252763
过: 'guō', // 如明代围棋国手过百龄,清代文人过春山。https://baike.baidu.com/item/%E8%BF%87%E5%A7%93/9822922
钻: 'zuān',
谌: 'shèn', // 如羽毛球运动员谌龙
折: 'shé zhé',
召: 'shào zhào', // 一读【shào】,得姓始祖为周武王之弟召公姬奭(shì)。一读【zhào】,为傣族姓。
毌丘: 'guàn qiū', // 不要读作 wú qiū 或 mǔ qiū,也不要写作“毋丘”或“母丘”。
谌: 'chén',
折: 'shé',
召: 'shào',
郄: 'qiè',

// 增补 2
撒: 'sǎ', // 如主持人撒贝宁(原名撒播),他本人在念自己名字的时候通常读作四声「sà」,但在这个视频 30 秒开始明确的说:「我这个姓念 sǎ」:https://v.cctv.com/2020/02/23/VIDEhOnwKFS2lsri9QL4I7xX200223.shtml
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这些注释可以保留着?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

也可以

撒: 'sǎ',
};

export default Surnames;
Expand Down
8 changes: 5 additions & 3 deletions types/data/special.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Pattern } from "../common/ac";
import type { Pattern } from '../common/ac';
export declare const InitialList: string[];
export declare const SpecialInitialList: string[];
export declare const SpecialFinalList: string[];
Expand Down Expand Up @@ -26,5 +26,7 @@ export declare const SpecialFinalMap: {
};
export declare const doubleFinalList: string[];
export declare const PatternNumberDict: Pattern[];
export declare const SpecialChangeToneList: string[];
export declare function getSepecialChangeTone(cur: string, pre: string, next: string): string;
export declare const inflectionList: string[];
export declare function processInflection(cur: string, pre: string, next: string): string | undefined;
export declare function processInflectionLiao(cur: string, pre: string): "liǎo" | undefined;
export declare function processSepecialPinyin(cur: string, pre: string, next: string): string;
Loading