Skip to content

Commit

Permalink
Add c3 serializer
Browse files Browse the repository at this point in the history
  • Loading branch information
tansongchen committed Jul 1, 2024
1 parent f60fdd3 commit 0892a5f
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 49 deletions.
9 changes: 0 additions & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
"js-yaml": "4.1.0",
"libchai": "^0.1.12",
"lodash-es": "^4.17.21",
"lz-string": "^1.5.0",
"mathjs": "^13.0.1",
"nanoid": "^5.0.7",
"optics-ts": "^2.4.1",
Expand Down
1 change: 1 addition & 0 deletions src/atoms/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export const charactersAtom = atom((get) => {
CharacterSetSpecifier,
(k: string, v: PrimitiveCharacter) => boolean
> = {
gb2312: (_, v) => v.gb2312,
general: (_, v) => v.tygf > 0,
basic: (k, v) => v.tygf > 0 || isValidCJKBasicChar(k),
extended: (k, v) => v.tygf > 0 || isValidCJKChar(k),
Expand Down
7 changes: 1 addition & 6 deletions src/components/ResultDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,7 @@ export default function ResultDetail({

return data.length ? (
<Flex vertical gap="middle">
<Flex
wrap="wrap"
gap="middle"
align="center"
style={{ fontSize: "1rem" }}
>
<Flex wrap="wrap" gap="middle" align="center">
<span>包含字根</span>
{[...reversedRootMap].map(([s, v]) => (
<Space key={s}>
Expand Down
150 changes: 121 additions & 29 deletions src/lib/compound.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ import { affineMerge } from "./affine";
import type { ComponentResults, ComponentAnalysis } from "./component";
import { InvalidGlyphError } from "./component";
import type {
Block,
Compound,
Character,
Repertoire,
Operator,
SVGGlyph,
CompoundCharacter,
} from "./data";

export type CompoundResults = Map<string, CompoundAnalysis>;
Expand All @@ -22,10 +21,8 @@ export type CompoundAnalysis = CompoundBasicAnalysis | CompoundGenuineAnalysis;
*/
interface CompoundGenuineAnalysis {
sequence: string[];
detail: {
operator: Operator;
partitionResults: PartitionResult[];
};
operator: Operator;
operandResults: PartitionResult[];
}

/**
Expand Down Expand Up @@ -80,9 +77,9 @@ export const recursiveRenderCompound = function (
* @remarks 这个实现目前比较低效,需要改进
*/
const topologicalSort = (repertoire: Repertoire) => {
let compounds = new Map<string, Character>();
let compounds = new Map<string, CompoundCharacter>();
for (let i = 0; i !== 10; ++i) {
const thisLevelCompound = new Map<string, Character>();
const thisLevelCompound = new Map<string, CompoundCharacter>();
for (const [name, character] of Object.entries(repertoire)) {
const { glyph } = character;
if (compounds.get(name)) continue;
Expand All @@ -94,24 +91,29 @@ const topologicalSort = (repertoire: Repertoire) => {
compounds.get(x) !== undefined,
)
) {
thisLevelCompound.set(name, character);
thisLevelCompound.set(name, character as CompoundCharacter);
}
}
compounds = new Map([...compounds, ...thisLevelCompound]);
}
return compounds;
};

const assembleSequence = (
partitionResults: PartitionResult[],
order: Block[],
) => {
type Serializer = (
r: PartitionResult[],
g: Compound,
name?: string,
) => string[];

const sequentialSerializer: Serializer = (operandResults, glyph) => {
if (glyph.order === undefined)
return operandResults.map((x) => x.sequence).flat();
const sequence: string[] = [];
const subsequences = partitionResults.map((x) => ({
const subsequences = operandResults.map((x) => ({
rest: x.sequence,
taken: 0,
}));
for (const { index, strokes } of order) {
for (const { index, strokes } of glyph.order) {
const data = subsequences[index];
if (data === undefined) {
continue;
Expand All @@ -120,7 +122,7 @@ const assembleSequence = (
sequence.push(...data.rest);
data.rest = [];
} else {
const partitionResult = partitionResults[index]!;
const partitionResult = operandResults[index]!;
if ("schemes" in partitionResult) {
const { detail, strokes: totalStrokes } = partitionResult;
const upperBound = 1 << (totalStrokes - data.taken);
Expand All @@ -139,6 +141,98 @@ const assembleSequence = (
return sequence;
};

const recursiveExpand: (x: PartitionResult[]) => PartitionResult[] = (x) => {
const result: PartitionResult[] = [];
for (const part of x) {
if ("operandResults" in part && /[]/.test(part.operator)) {
result.push(...recursiveExpand(part.operandResults));
} else {
result.push(part);
}
}
return result;
};

const robustPartition = (p: {
operandResults: PartitionResult[];
operator: Operator;
}) => {
const { operandResults, operator } = p;
const firstPartition: PartitionResult[] = [];
let start = 0;
let dieyanAfter;
// 叠和非叠
const die = /[]/;
const notDie = /[^]/;
// 叠眼
let dieyan: boolean[];
const postProcess: (x: PartitionResult[]) => PartitionResult = (x) => {
if (x.length === 2) {
return {
sequence: x.map((y) => y.sequence).flat(),
operator: "⿱",
operandResults: x,
};
} else if (x.length === 3) {
return {
sequence: x.map((y) => y.sequence[0]!),
operator: "⿳",
operandResults: x,
};
}
return x[0]!;
};
if (die.test(operator)) {
const expanded = recursiveExpand(operandResults);
dieyan = expanded.map((x) => "operator" in x && notDie.test(x.operator));
for (const [i, x] of dieyan.entries()) {
if (x) {
const dieyanBefore = expanded.slice(start, i);
if (dieyanBefore.length > 0) {
firstPartition.push(postProcess(dieyanBefore));
}
firstPartition.push(expanded[i]!);
start = i + 1;
}
}
dieyanAfter = expanded.slice(start);
if (dieyanAfter.length > 0) {
firstPartition.push(postProcess(dieyanAfter));
}
} else {
firstPartition.push(...operandResults);
}
return firstPartition;
};

const c3Serializer: Serializer = (operandResults, glyph) => {
const primaryPartition = robustPartition({
operandResults,
operator: glyph.operator,
});
if (primaryPartition.length === 1) {
return primaryPartition[0]!.sequence.slice(0, 3);
} else if (primaryPartition.length === 3) {
return primaryPartition.map((x) => x.sequence[0]!);
} else {
// 需要执行二次拆分
const sequence: string[] = [];
for (const part of primaryPartition) {
if ("operandResults" in part) {
const smallerParts = robustPartition(part).slice(0, 2);
if (smallerParts.length >= 2) {
sequence.push(...smallerParts.map((x) => x.sequence[0]!));
} else {
sequence.push(...smallerParts[0]!.sequence.slice(0, 2));
}
} else {
sequence.push(...part.sequence.slice(0, 2));
}
}
return sequence.slice(0, 3);
}
};

/**
* 对复合体进行拆分
*
Expand All @@ -159,27 +253,25 @@ export const disassembleCompounds = (
const getResult = function (s: string): PartitionResult | undefined {
return componentResults.get(s) || compoundResults.get(s);
};
for (const [char, glyph] of compounds.entries()) {
const serializerName = config.analysis.serializer ?? "sequential";
const serializer =
serializerName === "c3" ? c3Serializer : sequentialSerializer;
for (const [char, { glyph }] of compounds.entries()) {
if (config.primaryRoots.has(char) || config.secondaryRoots.has(char)) {
// 复合体本身是一个字根
compoundResults.set(char, { sequence: [char] });
continue;
}
const { operator, operandList, order } = glyph.glyph as Compound;
const rawPartitionResults = operandList.map(getResult);
if (rawPartitionResults.every((x) => x !== undefined)) {
const { operator, operandList } = glyph;
const rawOperandResults = operandList.map(getResult);
if (rawOperandResults.every((x) => x !== undefined)) {
// this is safe!
const partitionResults = rawPartitionResults as PartitionResult[];
const sequence =
order === undefined
? partitionResults.map((x) => x.sequence).flat()
: assembleSequence(partitionResults, order);
const operandResults = rawOperandResults as PartitionResult[];
const sequence = serializer(operandResults, glyph, char);
compoundResults.set(char, {
sequence,
detail: {
operator,
partitionResults,
},
operator,
operandResults: operandResults,
});
} else {
if (knownCharacters.has(char)) {
Expand Down
8 changes: 7 additions & 1 deletion src/lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ export interface Info {

// config.data begin

export const characterSetSpecifiers = ["general", "basic", "extended"] as const;
export const characterSetSpecifiers = [
"gb2312",
"general",
"basic",
"extended",
] as const;
export type CharacterSetSpecifier = (typeof characterSetSpecifiers)[number];

export interface Data {
Expand All @@ -37,6 +42,7 @@ export interface Analysis {
customize?: Record<string, string[]>;
strong?: string[];
weak?: string[];
serializer?: "sequential" | "c3";
}

export interface Degenerator {
Expand Down
8 changes: 8 additions & 0 deletions src/lib/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@ export interface Character
glyph: BasicComponent | Compound | undefined;
}

export interface ComponentCharacter extends Character {
glyph: BasicComponent;
}

export interface CompoundCharacter extends Character {
glyph: Compound;
}

/** 原始字符集,为字符名称到原始字符的映射 */
export type PrimitiveRepertoire = Record<string, PrimitiveCharacter>;

Expand Down
4 changes: 2 additions & 2 deletions src/lib/element.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ export const findElement = (
case "固定":
return object.key;
case "结构":
if ("detail" in result && "operator" in result.detail) {
return result.detail.operator;
if ("operator" in result) {
return result.operator;
}
return undefined;
case "字音":
Expand Down
2 changes: 1 addition & 1 deletion src/pages/[id]/analysis.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ const AnalysisResults = ({ filter }: { filter: CharacterFilter }) => {
items={displays[step].slice((page - 1) * pageSize, page * pageSize)}
accordion={true}
size="small"
style={{ alignSelf: "stretch", fontSize: "2em" }}
style={{ alignSelf: "stretch" }}
/>
<Pagination
current={page}
Expand Down
1 change: 1 addition & 0 deletions src/pages/[id]/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ export default function Index() {
useChaifenTitle("基本信息");
const [characterSet, setCharacterSet] = useAtom(characterSetAtom);
const specifierNames: Record<CharacterSetSpecifier, string> = {
gb2312: "GB2312",
general: "通用",
basic: "基本",
extended: "扩展",
Expand Down

0 comments on commit 0892a5f

Please sign in to comment.