Merge pull request #229 from mermaid-js/feat/support-number-unit-1kg

Feat/support number unit 1kg
mermaid-js · Dec 14, 2024 · e81a020 · e81a020
2 parents ecbf24f + b7d4a34
commit e81a020
Show file tree

Hide file tree

Showing 9 changed files with 725 additions and 436 deletions.
diff --git a/src/g4/sequenceLexer.g4 b/src/g4/sequenceLexer.g4
@@ -34,7 +34,6 @@ ARROW
  : '->'
  ;
 
-// Tokens
 fragment HEX
     : [0-9a-fA-F]
     ;
@@ -99,13 +98,29 @@ ID
  : [a-zA-Z_] [a-zA-Z_0-9]*
  ;
 
+fragment UNIT
+ : [a-zA-Z]+
+ ;
+
+fragment DIGIT
+    : [0-9]
+    ;
+
 INT
- : [0-9]+
+ : DIGIT+
  ;
 
 FLOAT
- : [0-9]+ '.' [0-9]*
- | '.' [0-9]+
+ : DIGIT+ '.' DIGIT*
+ | '.' DIGIT+
+ ;
+
+MONEY
+    : '$' (INT | FLOAT)
+    ;
+
+NUMBER_UNIT
+ : (INT | FLOAT) UNIT
  ;
 
 // As long as the text starts with double quotes, we treat it as a string before a closing double quote or change line

diff --git a/src/g4/sequenceParser.g4 b/src/g4/sequenceParser.g4
@@ -125,12 +125,12 @@ opt
 critical
   : CRITICAL (OPAR atom? CPAR)? braceBlock
   | CRITICAL
-  ;
+ ;
 
 section
   : SECTION (OPAR atom? CPAR)? braceBlock
   | SECTION
-  ;
+ ;
 
 creation
  : creationBody (SCOL | braceBlock)?
@@ -291,9 +291,10 @@ expr
  | assignment expr                      #assignmentExpr
  ;
 
-// [Perf tuning] Merging below tokens does not help.
 atom
  : (INT | FLOAT)  #numberAtom
+ | NUMBER_UNIT    #numberUnitAtom
+ | MONEY          #moneyAtom
  | (TRUE | FALSE) #booleanAtom
  | ID             #idAtom
  | STRING         #stringAtom

diff --git a/src/generated-parser/sequenceLexer.js b/src/generated-parser/sequenceLexer.js
diff --git a/src/generated-parser/sequenceLexer.tokens b/src/generated-parser/sequenceLexer.tokens
@@ -57,15 +57,17 @@ DOT=56
 ID=57
 INT=58
 FLOAT=59
-STRING=60
-CR=61
-COMMENT=62
-OTHER=63
-DIVIDER=64
-EVENT_PAYLOAD_LXR=65
-EVENT_END=66
-TITLE_CONTENT=67
-TITLE_END=68
+MONEY=60
+NUMBER_UNIT=61
+STRING=62
+CR=63
+COMMENT=64
+OTHER=65
+DIVIDER=66
+EVENT_PAYLOAD_LXR=67
+EVENT_END=68
+TITLE_CONTENT=69
+TITLE_END=70
 'const'=2
 'readonly'=3
 'static'=4

diff --git a/src/generated-parser/sequenceParser.js b/src/generated-parser/sequenceParser.js
diff --git a/src/generated-parser/sequenceParser.tokens b/src/generated-parser/sequenceParser.tokens
@@ -57,15 +57,17 @@ DOT=56
 ID=57
 INT=58
 FLOAT=59
-STRING=60
-CR=61
-COMMENT=62
-OTHER=63
-DIVIDER=64
-EVENT_PAYLOAD_LXR=65
-EVENT_END=66
-TITLE_CONTENT=67
-TITLE_END=68
+MONEY=60
+NUMBER_UNIT=61
+STRING=62
+CR=63
+COMMENT=64
+OTHER=65
+DIVIDER=66
+EVENT_PAYLOAD_LXR=67
+EVENT_END=68
+TITLE_CONTENT=69
+TITLE_END=70
 'const'=2
 'readonly'=3
 'static'=4

diff --git a/src/generated-parser/sequenceParserListener.js b/src/generated-parser/sequenceParserListener.js
@@ -599,6 +599,24 @@ export default class sequenceParserListener extends antlr4.tree.ParseTreeListene
 	}
 
 
+	// Enter a parse tree produced by sequenceParser#numberUnitAtom.
+	enterNumberUnitAtom(ctx) {
+	}
+
+	// Exit a parse tree produced by sequenceParser#numberUnitAtom.
+	exitNumberUnitAtom(ctx) {
+	}
+
+
+	// Enter a parse tree produced by sequenceParser#moneyAtom.
+	enterMoneyAtom(ctx) {
+	}
+
+	// Exit a parse tree produced by sequenceParser#moneyAtom.
+	exitMoneyAtom(ctx) {
+	}
+
+
 	// Enter a parse tree produced by sequenceParser#booleanAtom.
 	enterBooleanAtom(ctx) {
 	}

diff --git a/src/parser/Atom/Money.spec.ts b/src/parser/Atom/Money.spec.ts
@@ -0,0 +1,74 @@
+import antlr4 from "antlr4";
+import { default as sequenceLexer } from "../../generated-parser/sequenceLexer";
+import { default as sequenceParser } from "../../generated-parser/sequenceParser";
+
+// Add getFormattedText to the atom context prototype
+sequenceParser.AtomContext.prototype.getFormattedText = function () {
+  if (this.MONEY && this.MONEY()) {
+    return this.MONEY().getText();
+  }
+  return this.getText();
+};
+
+function parseAtom(input: string) {
+  const chars = new antlr4.InputStream(input);
+  const lexer = new sequenceLexer(chars);
+  const tokens = new antlr4.CommonTokenStream(lexer);
+  const parser = new sequenceParser(tokens);
+  return parser.atom();
+}
+
+describe("Money", () => {
+  describe("valid cases", () => {
+    it("should parse simple money amount and verify token", () => {
+      const ast = parseAtom("$100");
+      const token = ast.MONEY().symbol;
+      expect(ast.getFormattedText()).toBe("$100");
+      expect(sequenceParser.symbolicNames[token.type]).toBe("MONEY");
+    });
+
+    it("should parse zero money amount and verify token", () => {
+      const ast = parseAtom("$0");
+      const token = ast.MONEY().symbol;
+      expect(ast.getFormattedText()).toBe("$0");
+      expect(sequenceParser.symbolicNames[token.type]).toBe("MONEY");
+    });
+
+    it("should parse large money amount and verify token", () => {
+      const ast = parseAtom("$1000000");
+      const token = ast.MONEY().symbol;
+      expect(ast.getFormattedText()).toBe("$1000000");
+      expect(sequenceParser.symbolicNames[token.type]).toBe("MONEY");
+    });
+
+    it("should parse money amount with leading zeros", () => {
+      const ast = parseAtom("$01");
+      expect(ast.getFormattedText()).toBe("$01");
+    });
+
+    it("should parse decimal money amounts and verify token", () => {
+      const ast1 = parseAtom("$1.50");
+      const token1 = ast1.MONEY().symbol;
+      const ast2 = parseAtom("$0.50");
+      const token2 = ast2.MONEY().symbol;
+      const ast3 = parseAtom("$.50");
+      const token3 = ast3.MONEY().symbol;
+
+      expect(ast1.getFormattedText()).toBe("$1.50");
+      expect(sequenceParser.symbolicNames[token1.type]).toBe("MONEY");
+      expect(ast2.getFormattedText()).toBe("$0.50");
+      expect(sequenceParser.symbolicNames[token2.type]).toBe("MONEY");
+      expect(ast3.getFormattedText()).toBe("$.50");
+      expect(sequenceParser.symbolicNames[token3.type]).toBe("MONEY");
+    });
+
+    // Debug helper to print all token information
+    it("should print token debug information", () => {
+      const ast = parseAtom("$100");
+      const token = ast.MONEY().symbol;
+      console.log("Token type:", token.type);
+      console.log("Symbolic names:", sequenceParser.symbolicNames);
+      console.log("Rule names:", sequenceParser.ruleNames);
+    });
+  });
+});
diff --git a/src/parser/Atom/NumberUnit.spec.ts b/src/parser/Atom/NumberUnit.spec.ts
@@ -0,0 +1,91 @@
+import antlr4 from "antlr4";
+import { default as sequenceLexer } from "../../generated-parser/sequenceLexer";
+import { default as sequenceParser } from "../../generated-parser/sequenceParser";
+
+// Add getFormattedText to the atom context prototype
+sequenceParser.AtomContext.prototype.getFormattedText = function () {
+  if (this.NUMBER_UNIT && this.NUMBER_UNIT()) {
+    return this.NUMBER_UNIT().getText();
+  }
+  return this.getText();
+};
+
+function parseAtom(input: string) {
+  const chars = new antlr4.InputStream(input);
+  const lexer = new sequenceLexer(chars);
+  const tokens = new antlr4.CommonTokenStream(lexer);
+  const parser = new sequenceParser(tokens);
+  return parser.atom();
+}
+
+describe("NumberUnit", () => {
+  describe("valid cases", () => {
+    it("should parse simple number with unit and verify token", () => {
+      const ast = parseAtom("1kg");
+      const token = ast.NUMBER_UNIT().symbol;
+      expect(ast.getFormattedText()).toBe("1kg");
+      expect(sequenceParser.symbolicNames[token.type]).toBe("NUMBER_UNIT");
+    });
+
+    it("should parse zero with unit and verify token", () => {
+      const ast = parseAtom("0kg");
+      const token = ast.NUMBER_UNIT().symbol;
+      expect(ast.getFormattedText()).toBe("0kg");
+      expect(sequenceParser.symbolicNames[token.type]).toBe("NUMBER_UNIT");
+    });
+
+    it("should parse large number with unit and verify token", () => {
+      const ast = parseAtom("100day");
+      const token = ast.NUMBER_UNIT().symbol;
+      expect(ast.getFormattedText()).toBe("100day");
+      expect(sequenceParser.symbolicNames[token.type]).toBe("NUMBER_UNIT");
+    });
+
+    it("should parse multi-character units", () => {
+      const ast1 = parseAtom("100day");
+      const ast2 = parseAtom("5km");
+      expect(ast1.getFormattedText()).toBe("100day");
+      expect(ast2.getFormattedText()).toBe("5km");
+    });
+
+    it("should parse unit without number", () => {
+      const ast = parseAtom("kg");
+      expect(ast.getFormattedText()).toBe("kg");
+    });
+
+    it("should parse number with leading zeros", () => {
+      const ast = parseAtom("01h");
+      expect(ast.getFormattedText()).toBe("01h");
+    });
+
+    it("should parse large number with leading zeros", () => {
+      const ast = parseAtom("010hours");
+      expect(ast.getFormattedText()).toBe("010hours");
+    });
+
+    it("should parse decimal numbers with unit and verify token", () => {
+      const ast1 = parseAtom("1.5kg");
+      const token1 = ast1.NUMBER_UNIT().symbol;
+      const ast2 = parseAtom("0.5h");
+      const token2 = ast2.NUMBER_UNIT().symbol;
+      const ast3 = parseAtom(".5m");
+      const token3 = ast3.NUMBER_UNIT().symbol;
+
+      expect(ast1.getFormattedText()).toBe("1.5kg");
+      expect(sequenceParser.symbolicNames[token1.type]).toBe("NUMBER_UNIT");
+      expect(ast2.getFormattedText()).toBe("0.5h");
+      expect(sequenceParser.symbolicNames[token2.type]).toBe("NUMBER_UNIT");
+      expect(ast3.getFormattedText()).toBe(".5m");
+      expect(sequenceParser.symbolicNames[token3.type]).toBe("NUMBER_UNIT");
+    });
+
+    // Debug helper to print all token information
+    it("should print token debug information", () => {
+      const ast = parseAtom("1kg");
+      const token = ast.NUMBER_UNIT().symbol;
+      console.log("Token type:", token.type);
+      console.log("Symbolic names:", sequenceParser.symbolicNames);
+      console.log("Rule names:", sequenceParser.ruleNames);
+    });
+  });
+});