Skip to content

Defining Transliterate Rules

Moe Myat Zaw edited this page Jun 7, 2019 · 1 revision

The minimal and easiest way to define transliterate rules is to just use from and to rule items:

[
  {
    "from": "\u103B([\u1000-\u1021])",
    "to": "$1\u103C"
  },
  {
    "from": "\u1039",
    "to": "\u103A"
  }
]

Or, can be used phase by phase rule items with template variables and sequence/loop variables:

[
  {
    "description": "Phase 1",
    "tplVar": {
      "#c": "\u1000-\u1021\u1086"
    },
    "tplSeq": {
      "#s": [["\u1000", "\u1060", 4], ["\u1005", "\u1065", 1]]
    },
    "rules": [
      {
        "description": "'revisit' and 'matchOnStart'",
        "from": "\u103F",
        "to": "\u1086",
        "revisit": true,
        "matchOnStart": true
      },
      {
        "description": "template variable and sequence variable",
        "from": "([#c])\u1039#s",
        "to": "$1#s",
        "tpl": true,
        "tplSeqName": "#s"
      },
      {
        "description": "'minLength' and 'quickTests'",
        "from": "\u100D\u1039\u100D",
        "to": "\u106E",
        "minLength": 3,
        "quickTests": [["\u100D", 0], ["\u1039", 1], ["\u100D", 2]]
      },
      ]
  },
  {
    "description": "Phase 2",
    "rules": [
      {
        "description": "match only without 'to'",
        "from": "[\u1041-\u1049]+[\u1040-\u1049]+\u102D"
      },
      {
        "from": "\u1040\u102D",
        "to": "\u101D\u102D"
      },
      {
        "description": "'when' options",
        "from": "\u1009([\u102D\u102E\u1032\u1036])(\u1037)",
        "to": "\u1025$1$2",
        "when": {
          "Zawgyi-One-2008": true
        }
      }
    ]
  }
]

Or, can be used complete TranslitRule model:

{
  "$schema": "http://schemas.dagonmetric.com/translit-rule/schema.json#",
  "tplVar": {
    "#c1": "\u1000-\u102A\u1086\u1040-\u1049",
    "#c2": "\u1000-\u1003\u1005\u1006-\u1008"
  },
  "phases": [
    {
      "tplSeq": {
        "#s1": [["\u102D", "\u108B", 1], ["\u102E", "\u108C", 1]],
        "#s2": [["\u1000", "\u1060", 4], ["\u1006", "\u1067", 3]]
      },
      "rules": [
        {
          "description": "'postRules'",
          "from": "\u1004\u103A\u1039([#c1]\u1039[#c2])#s1",
          "to": "$1#s1",
          "tpl": true,
          "tplSeqName": "#s1",
          "minLength": 7,
          "quickTests": [["#s1", 6]],
          "postRules": [
            {
              "description": "will be applied if the parent rule matched",
              "from": "([#c1])\u1039#s2",
              "to": "$1#s2",
              "tpl": true,
              "tplSeqName": "#s2"
            }
          ]
        }
      ]
    }
  ]
}

Learn more about translit-rule schema at http://schemas.dagonmetric.com/translit-rule/schema.json, or typescript model at translit-rule.ts.