From ed67196074154013af96a40a1610a7981200087e Mon Sep 17 00:00:00 2001
From: Nobuaki Karasawa <nkarasawa@xcoo.jp>
Date: Wed, 25 Sep 2024 08:25:48 +0900
Subject: [PATCH 1/6] fix!: fix dna ins/delins unknown repeat notation

---
 src/clj_hgvs/mutation.cljc       | 44 +++++++++++++++++++++++---------
 test/clj_hgvs/mutation_test.cljc | 19 +++++++++++---
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/src/clj_hgvs/mutation.cljc b/src/clj_hgvs/mutation.cljc
index 5e2bd76..03d750d 100644
--- a/src/clj_hgvs/mutation.cljc
+++ b/src/clj_hgvs/mutation.cljc
@@ -430,7 +430,7 @@
 ;;;      g.122_123ins123_234inv (TODO)
 ;;;      g.122_123ins213_234invinsAins123_211inv (TODO)
 ;;;      g.549_550insN
-;;;      g.1134_1135ins(100)
+;;;      g.1134_1135insN[100]
 ;;;      g.?_?insNC_000023.10:(12345_23456)_(34567_45678)
 
 (defrecord DNAInsertion [coord-start coord-end alt]
@@ -445,10 +445,10 @@
                            (string? alt) (case ins-format
                                            :auto (if (and (every? #(= % \N) alt)
                                                           (>= (count alt) 10))
-                                                   (str "(" (count alt) ")")
+                                                   (str "N[" (count alt) "]")
                                                    alt)
                                            :bases alt
-                                           :count (str "(" (count alt) ")"))
+                                           :count (str "N[" (count alt) "]"))
                            (map? alt) [(:transcript alt)
                                        ":"
                                        (coord/format (:coord-start alt))
@@ -478,7 +478,7 @@
 (defn- parse-dna-insertion-alt
   [s kind]
   (or (re-matches #"[A-Z]+" s)
-      (some-> (re-matches #"\((\d+)\)" s)
+      (some-> (re-matches #"N\[(\d+)\]" s)
               (second)
               (intl/parse-long)
               (repeat "N")
@@ -624,20 +624,28 @@
 ;;; e.g. g.6775delinsGA
 ;;;      g.6775delTinsGA
 ;;;      c.145_147delinsTGG
+;;;      c.145_147delinsN[10]
 
 (defrecord DNAIndel [coord-start coord-end ref alt]
   Mutation
   (format [this] (format this nil))
-  (format [this {:keys [show-bases?] :or {show-bases? false}}]
+  (format [this {:keys [show-bases? ins-format] :or {show-bases? false ins-format :auto}}]
     (apply str (flatten [(coord/format coord-start)
-                         (if (and coord-end
-                                  (or (not (coord/comparable-coordinates? coord-start coord-end))
-                                      (neg? (compare coord-start coord-end))))
+                         (when (and coord-end
+                                    (or (not (coord/comparable-coordinates? coord-start coord-end))
+                                        (neg? (compare coord-start coord-end))))
                            ["_" (coord/format coord-end)])
                          "del"
-                         (if show-bases? ref)
+                         (when show-bases? ref)
                          "ins"
-                         alt])))
+                         (when (string? alt)
+                           (case ins-format
+                             :auto (if (and (every? #(= % \N) alt)
+                                            (>= (count alt) 10))
+                                     (str "N[" (count alt) "]")
+                                     alt)
+                             :bases alt
+                             :count (str "N[" (count alt) "]")))])))
   (plain [this]
     (into {:mutation "dna-indel"} (plain-coords this)))
 
@@ -663,6 +671,15 @@
                           :clj-hgvs.mutation.dna-indel/ref
                           :clj-hgvs.mutation.dna-indel/alt])))
 
+(defn- parse-dna-indel-alt
+  [s]
+  (or (re-matches #"[A-Z]+" s)
+      (some-> (re-matches #"N\[(\d+)\]" s)
+              (second)
+              (intl/parse-long)
+              (repeat "N")
+              (#(apply str %)))))
+
 (defn dna-indel
   "Constructor of DNAIndel. Throws an exception if any input is illegal."
   [coord-start coord-end ref alt]
@@ -673,13 +690,16 @@
   (DNAIndel. coord-start coord-end ref alt))
 
 (def ^:private dna-indel-re
-  #"([\d\-\+\*\?]+)(?:_([\d\-\+\*\?]+))?del([A-Z]+)?ins([A-Z]+)")
+  #"([\d\-\+\*\?]+)(?:_([\d\-\+\*\?]+))?del([A-Z]+)?ins(N\[\d+\]|[A-Z]+)")
 
 (defn parse-dna-indel
   [s kind]
   (let [[_ coord-s coord-e ref alt] (re-matches dna-indel-re s)
         parse-coord (coord-parser kind)]
-    (dna-indel (parse-coord coord-s) (some-> coord-e parse-coord) ref alt)))
+    (dna-indel (parse-coord coord-s)
+               (some-> coord-e parse-coord)
+               ref
+               (parse-dna-indel-alt alt))))
 
 (defmethod restore "dna-indel"
   [m]
diff --git a/test/clj_hgvs/mutation_test.cljc b/test/clj_hgvs/mutation_test.cljc
index a49a292..28eb922 100644
--- a/test/clj_hgvs/mutation_test.cljc
+++ b/test/clj_hgvs/mutation_test.cljc
@@ -342,7 +342,7 @@
                                        "N"))
 
 (def dna-insertion6sb "1134_1135insNNNNNNNNNN")
-(def dna-insertion6sc "1134_1135ins(10)")
+(def dna-insertion6sc "1134_1135insN[10]")
 (def dna-insertion6k :genome)
 (def dna-insertion6 (mut/dna-insertion (coord/genomic-coordinate 1134)
                                        (coord/genomic-coordinate 1135)
@@ -524,20 +524,33 @@
                                nil
                                "TGG"))
 
+(def dna-indel4sb "1134_1138delinsNNNNNNNNNN")
+(def dna-indel4sc "1134_1138delinsN[10]")
+(def dna-indel4k :genome)
+(def dna-indel4 (mut/dna-indel (coord/genomic-coordinate 1134)
+                               (coord/genomic-coordinate 1138)
+                               nil
+                               "NNNNNNNNNN"))
+
 (deftest format-dna-indel-test
   (testing "returns a string expression of a DNA indel"
     (are [m o s] (= (mut/format m o) s)
       dna-indel1 nil dna-indel1s
       dna-indel2 nil dna-indel2ss
       dna-indel2 {:show-bases? true} dna-indel2s
-      dna-indel3 nil dna-indel3s)))
+      dna-indel3 nil dna-indel3s
+      dna-indel4 {:ins-format :auto} dna-indel4sc
+      dna-indel4 {:ins-format :bases} dna-indel4sb
+      dna-indel4 {:ins-format :count} dna-indel4sc)))
 
 (deftest parse-dna-indel-test
   (testing "returns a correct DNAIndel"
     (are [s k m] (= (mut/parse-dna-indel s k) m)
       dna-indel1s dna-indel1k dna-indel1
       dna-indel2s dna-indel2k dna-indel2
-      dna-indel3s dna-indel3k dna-indel3))
+      dna-indel3s dna-indel3k dna-indel3
+      dna-indel4sb dna-indel4k dna-indel4
+      dna-indel4sc dna-indel4k dna-indel4))
   (testing "invalid DNA indel"
     (are [s k] (thrown? #?(:clj Throwable, :cljs js/Error)
                         (mut/parse-dna-indel s k))

From c2e025d6291b0ca75b473093c64cfdc16598d8ad Mon Sep 17 00:00:00 2001
From: Nobuaki Karasawa <nkarasawa@xcoo.jp>
Date: Wed, 25 Sep 2024 08:50:18 +0900
Subject: [PATCH 2/6] fix!: fix rna ins unknown repeat notation

---
 src/clj_hgvs/mutation.cljc       | 12 ++++++------
 test/clj_hgvs/mutation_test.cljc |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/clj_hgvs/mutation.cljc b/src/clj_hgvs/mutation.cljc
index 03d750d..adf6222 100644
--- a/src/clj_hgvs/mutation.cljc
+++ b/src/clj_hgvs/mutation.cljc
@@ -1056,7 +1056,7 @@
 ;;; RNA - insertion
 ;;;
 ;;; e.g. r.756_757insacu
-;;;      r.431_432ins(5)
+;;;      r.431_432insn[5]
 ;;;      r.123_124insL37425.1:23_361
 
 (defrecord RNAInsertion [coord-start coord-end alt]
@@ -1069,7 +1069,7 @@
          "ins"
          (cond
            (map? alt) (str (:genbank alt) ":" (:coord-start alt) "_" (:coord-end alt))
-           (re-matches #"n{2,}" alt) (str "(" (count alt) ")")
+           (re-matches #"n{2,}" alt) (str "n[" (count alt) "]")
            :else alt)))
   (plain [this]
     (into {:mutation "rna-insertion"} (plain-coords this))))
@@ -1094,9 +1094,9 @@
 
 (defn- parse-rna-alt-n
   [s]
-  (if-let [n (some-> (re-find #"\((\d)\)" s)
-                     second
-                     intl/parse-long)]
+  (when-let [n (some-> (re-find #"n\[(\d+)\]" s)
+                       second
+                       intl/parse-long)]
     (apply str (repeat n "n"))))
 
 (def ^:private genbank-re
@@ -1118,8 +1118,8 @@
     (rna-insertion (coord/parse-rna-coordinate coord-s)
                    (some-> coord-e coord/parse-rna-coordinate)
                    (cond
+                     (re-find #"n\[\d+\]" alt) (parse-rna-alt-n alt)
                      (re-find #"[a-z]+" alt) alt
-                     (re-find #"\(\d\)" alt) (parse-rna-alt-n alt)
                      :else (parse-rna-alt-genbank alt)))))
 
 (defmethod restore "rna-insertion"
diff --git a/test/clj_hgvs/mutation_test.cljc b/test/clj_hgvs/mutation_test.cljc
index 28eb922..d83482f 100644
--- a/test/clj_hgvs/mutation_test.cljc
+++ b/test/clj_hgvs/mutation_test.cljc
@@ -831,7 +831,7 @@
                       :coord-end (coord/plain (coord/rna-coordinate 757 nil nil))
                       :alt "acu"})
 
-(def rna-insertion2s "431_432ins(5)")
+(def rna-insertion2s "431_432insn[5]")
 (def rna-insertion2 (mut/rna-insertion (coord/rna-coordinate 431 nil nil)
                                        (coord/rna-coordinate 432 nil nil)
                                        "nnnnn"))

From e11455b174522fe97c282ecac50b57aa99413826 Mon Sep 17 00:00:00 2001
From: Nobuaki Karasawa <nkarasawa@xcoo.jp>
Date: Wed, 25 Sep 2024 09:04:09 +0900
Subject: [PATCH 3/6] fix!: fix protein ins unknown repeat notation

---
 src/clj_hgvs/mutation.cljc       | 12 ++++++++----
 test/clj_hgvs/mutation_test.cljc |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/clj_hgvs/mutation.cljc b/src/clj_hgvs/mutation.cljc
index adf6222..7041543 100644
--- a/src/clj_hgvs/mutation.cljc
+++ b/src/clj_hgvs/mutation.cljc
@@ -1744,7 +1744,7 @@
 ;;; Protein - insertion
 ;;;
 ;;; e.g. Lys23_Leu24insArgSerGln
-;;;      Arg78_Gly79ins23
+;;;      Arg78_Gly79insX[23]
 
 (defrecord ProteinInsertion [ref-start coord-start ref-end coord-end alts]
   Mutation
@@ -1759,7 +1759,7 @@
                          (coord/format coord-end)
                          "ins"
                          (if (every? #(= % "Xaa") alts)
-                           (count alts)
+                           (str "X[" (count alts) "]")
                            (cond->> alts
                              (= amino-acid-format :short) (map ->short-amino-acid)))])))
   (plain [this]
@@ -1790,10 +1790,14 @@
   [s]
   (condp re-matches s
     #"([A-Z*]([a-z]{2})?)+" (mapv ->long-amino-acid (re-seq #"[A-Z*](?:[a-z]{2})?" s))
-    #"\d+" (vec (repeat (intl/parse-long s) "Xaa"))))
+    #"X\[\d+\]" (-> (re-find #"X\[(\d+)\]" s)
+                    second
+                    intl/parse-long
+                    (repeat "Xaa")
+                    vec)))
 
 (def ^:private protein-insertion-re
-  #"([A-Z](?:[a-z]{2})?)(\d+)_([A-Z](?:[a-z]{2})?)(\d+)ins([\da-zA-Z*]+)")
+  #"([A-Z](?:[a-z]{2})?)(\d+)_([A-Z](?:[a-z]{2})?)(\d+)ins([\da-zA-Z*\[\]]+)")
 
 (defn parse-protein-insertion
   [s]
diff --git a/test/clj_hgvs/mutation_test.cljc b/test/clj_hgvs/mutation_test.cljc
index d83482f..aa1e52d 100644
--- a/test/clj_hgvs/mutation_test.cljc
+++ b/test/clj_hgvs/mutation_test.cljc
@@ -1327,7 +1327,7 @@
                                                "Leu" (coord/protein-coordinate 24)
                                                ["Arg" "Ser" "Ter"]))
 
-(def protein-insertion3s "Arg78_Gly79ins5")
+(def protein-insertion3s "Arg78_Gly79insX[5]")
 (def protein-insertion3 (mut/protein-insertion "Arg" (coord/protein-coordinate 78)
                                                "Gly" (coord/protein-coordinate 79)
                                                ["Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))

From b551792360f474ffc6a56421eb44110b9bfcea06 Mon Sep 17 00:00:00 2001
From: Nobuaki Karasawa <nkarasawa@xcoo.jp>
Date: Tue, 15 Oct 2024 09:24:28 +0900
Subject: [PATCH 4/6] fix: add unknown repeat repairer

---
 src/clj_hgvs/repairer.cljc   | 15 +++++++++++++++
 test/clj_hgvs/core_test.cljc |  5 +++++
 2 files changed, 20 insertions(+)

diff --git a/src/clj_hgvs/repairer.cljc b/src/clj_hgvs/repairer.cljc
index ec76d7d..0470446 100644
--- a/src/clj_hgvs/repairer.cljc
+++ b/src/clj_hgvs/repairer.cljc
@@ -156,6 +156,20 @@
     (string/replace s #"del\d+ins" "delins")
     s))
 
+;; g.1134_1135ins(100) -> g.1134_1135insN[100]
+;; r.431_432ins(5) -> r.431_432insn[5]
+;; p.R78_G79ins23 -> p.R78_G79insX[23]
+(defn ^:no-doc replace-uncertain-bases
+  [s kind]
+  (case kind
+    (:genome :mitochondria :coding-dna :non-coding-dna :circular-dna)
+    (string/replace s #"ins\((\d+)\)$" "insN[$1]")
+    (:rna)
+    (string/replace s #"ins\((\d+)\)$" "insn[$1]")
+    (:protein)
+    (string/replace s #"ins(\d+)$" "insX[$1]")
+    s))
+
 ;; c.112GAT(14) -> c.112GAT[14]
 (defn ^:no-doc replace-repeated-seqs-parens1
   [s kind]
@@ -318,6 +332,7 @@
    remove-alternative
    remove-inv-bases
    remove-del-count-from-delins
+   replace-uncertain-bases
    replace-repeated-seqs-parens1
    replace-repeated-seqs-parens2
    remove-genomic-bases-from-protein
diff --git a/test/clj_hgvs/core_test.cljc b/test/clj_hgvs/core_test.cljc
index e2feeec..84ec891 100644
--- a/test/clj_hgvs/core_test.cljc
+++ b/test/clj_hgvs/core_test.cljc
@@ -332,6 +332,11 @@
     ;; remove-del-count-from-delins
     "c.123_124del2insCTGA" "c.123_124delinsCTGA"
 
+    ;; replace-uncertain-bases
+    "g.1134_1135ins(100)" "g.1134_1135insN[100]"
+    "r.431_432ins(5)"     "r.431_432insn[5]"
+    "p.R78_G79ins23"      "p.R78_G79insX[23]"
+
     ;; replace-repeated-seqs-parens1
     "c.112GAT(14)" "c.112GAT[14]"
 

From 4a0f78802ded0ca8b3b7d19562b633c8aa4664c1 Mon Sep 17 00:00:00 2001
From: Nobuaki Karasawa <nkarasawa@xcoo.jp>
Date: Wed, 16 Oct 2024 09:57:10 +0900
Subject: [PATCH 5/6] feat: support indel unknown insertion and ins-format

---
 src/clj_hgvs/mutation.cljc       |  68 ++++++++++++++-----
 test/clj_hgvs/mutation_test.cljc | 112 ++++++++++++++++++++++++++-----
 2 files changed, 146 insertions(+), 34 deletions(-)

diff --git a/src/clj_hgvs/mutation.cljc b/src/clj_hgvs/mutation.cljc
index 7041543..af01791 100644
--- a/src/clj_hgvs/mutation.cljc
+++ b/src/clj_hgvs/mutation.cljc
@@ -1062,14 +1062,19 @@
 (defrecord RNAInsertion [coord-start coord-end alt]
   Mutation
   (format [this] (format this nil))
-  (format [this _]
+  (format [this {:keys [ins-format] :or {ins-format :auto}}]
     (str (coord/format coord-start)
          "_"
          (coord/format coord-end)
          "ins"
          (cond
            (map? alt) (str (:genbank alt) ":" (:coord-start alt) "_" (:coord-end alt))
-           (re-matches #"n{2,}" alt) (str "n[" (count alt) "]")
+           (re-matches #"n{2,}" alt) (case ins-format
+                                       :auto (if (>= (count alt) 10)
+                                               (str "n[" (count alt) "]")
+                                               alt)
+                                       :bases alt
+                                       :count (str "n[" (count alt) "]"))
            :else alt)))
   (plain [this]
     (into {:mutation "rna-insertion"} (plain-coords this))))
@@ -1238,20 +1243,28 @@
 ;;; e.g. r.775delinsga
 ;;;      r.775deluinsga
 ;;;      r.775_777delinsc
+;;;      r.775_777delinsn[10]
 
 (defrecord RNAIndel [coord-start coord-end ref alt]
   Mutation
   (format [this] (format this nil))
-  (format [this {:keys [show-bases?] :or {show-bases? false}}]
+  (format [this {:keys [show-bases? ins-format] :or {show-bases? false ins-format :auto}}]
     (str (coord/format coord-start)
-         (if (and coord-end
-                  (or (not (coord/comparable-coordinates? coord-start coord-end))
-                      (neg? (compare coord-start coord-end))))
+         (when (and coord-end
+                    (or (not (coord/comparable-coordinates? coord-start coord-end))
+                        (neg? (compare coord-start coord-end))))
            (str "_" (coord/format coord-end)))
          "del"
-         (if show-bases? ref)
+         (when show-bases? ref)
          "ins"
-         alt))
+         (if (re-matches #"n{2,}" alt)
+           (case ins-format
+             :auto (if (>= (count alt) 10)
+                     (str "n[" (count alt) "]")
+                     alt)
+             :bases alt
+             :count (str "n[" (count alt) "]"))
+           alt)))
   (plain [this]
     (into {:mutation "rna-indel"} (plain-coords this)))
 
@@ -1287,7 +1300,7 @@
   (RNAIndel. coord-start coord-end ref alt))
 
 (def ^:private rna-indel-re
-  #"([\d\-\+\*]+)(?:_([\d\-\+\*]+))?del([a-z]+)?ins([a-z]+)")
+  #"([\d\-\+\*]+)(?:_([\d\-\+\*]+))?del([a-z]+)?ins([a-z\d\[\]]+)")
 
 (defn parse-rna-indel
   [s]
@@ -1295,7 +1308,9 @@
     (rna-indel (coord/parse-rna-coordinate coord-s)
                (some-> coord-e coord/parse-rna-coordinate)
                ref
-               alt)))
+               (if (re-find #"n\[\d+\]" alt)
+                 (parse-rna-alt-n alt)
+                 alt))))
 
 (defmethod restore "rna-indel"
   [m]
@@ -1749,7 +1764,7 @@
 (defrecord ProteinInsertion [ref-start coord-start ref-end coord-end alts]
   Mutation
   (format [this] (format this nil))
-  (format [this {:keys [amino-acid-format] :or {amino-acid-format :long}}]
+  (format [this {:keys [amino-acid-format ins-format] :or {amino-acid-format :long ins-format :auto}}]
     (apply str (flatten [(cond-> ref-start
                            (= amino-acid-format :short) ->short-amino-acid)
                          (coord/format coord-start)
@@ -1759,7 +1774,14 @@
                          (coord/format coord-end)
                          "ins"
                          (if (every? #(= % "Xaa") alts)
-                           (str "X[" (count alts) "]")
+                           (let [alts (cond->> alts
+                                        (= amino-acid-format :short) (map ->short-amino-acid))]
+                             (case ins-format
+                               :auto (if (>= (count alts) 10)
+                                       (str "X[" (count alts) "]")
+                                       alts)
+                               :amino-acids alts
+                               :count (str "X[" (count alts) "]")))
                            (cond->> alts
                              (= amino-acid-format :short) (map ->short-amino-acid)))])))
   (plain [this]
@@ -1817,22 +1839,32 @@
 ;;;
 ;;; e.g. Cys28delinsTrpVal
 ;;;      Cys28_Lys29delinsTrp
+;;;      Cys28_Lys29delinsX[10]
 
 (defrecord ProteinIndel [ref-start coord-start ref-end coord-end alts]
   Mutation
   (format [this] (format this nil))
-  (format [this {:keys [amino-acid-format] :or {amino-acid-format :long}}]
+  (format [this {:keys [amino-acid-format ins-format] :or {amino-acid-format :long ins-format :auto}}]
     (apply str (flatten [(cond-> ref-start
                            (= amino-acid-format :short) ->short-amino-acid)
                          (coord/format coord-start)
-                         (if (should-show-end? ref-start coord-start ref-end coord-end)
+                         (when (should-show-end? ref-start coord-start ref-end coord-end)
                            ["_"
                             (cond-> ref-end
                               (= amino-acid-format :short) ->short-amino-acid)
                             (coord/format coord-end)])
                          "delins"
-                         (cond->> alts
-                           (= amino-acid-format :short) (map ->short-amino-acid))])))
+                         (if (every? #(= % "Xaa") alts)
+                           (let [alts (cond->> alts
+                                        (= amino-acid-format :short) (map ->short-amino-acid))]
+                             (case ins-format
+                               :auto (if (>= (count alts) 10)
+                                       (str "X[" (count alts) "]")
+                                       alts)
+                               :amino-acids alts
+                               :count (str "X[" (count alts) "]")))
+                           (cond->> alts
+                             (= amino-acid-format :short) (map ->short-amino-acid)))])))
   (plain [this]
     (into {:mutation "protein-indel"} (plain-coords this))))
 
@@ -1859,7 +1891,7 @@
   (ProteinIndel. ref-start coord-start ref-end coord-end alts))
 
 (def ^:private protein-indel-re
-  #"([A-Z](?:[a-z]{2})?)(\d+)(?:_([A-Z](?:[a-z]{2})?)(\d+))?delins([A-Z*][a-zA-Z*]*)?")
+  #"([A-Z](?:[a-z]{2})?)(\d+)(?:_([A-Z](?:[a-z]{2})?)(\d+))?delins([A-Z*][a-zA-Z*\[\]\d]*)?")
 
 (defn parse-protein-indel
   [s]
@@ -1868,7 +1900,7 @@
                    (coord/parse-protein-coordinate coord-s)
                    (->long-amino-acid ref-e)
                    (some-> coord-e coord/parse-protein-coordinate)
-                   (mapv ->long-amino-acid (some->> alts (re-seq #"[A-Z*](?:[a-z]{2})?"))))))
+                   (mapv ->long-amino-acid (some->> alts parse-protein-insertion-alts)))))
 
 (defmethod restore "protein-indel"
   [m]
diff --git a/test/clj_hgvs/mutation_test.cljc b/test/clj_hgvs/mutation_test.cljc
index aa1e52d..85061d7 100644
--- a/test/clj_hgvs/mutation_test.cljc
+++ b/test/clj_hgvs/mutation_test.cljc
@@ -831,13 +831,19 @@
                       :coord-end (coord/plain (coord/rna-coordinate 757 nil nil))
                       :alt "acu"})
 
-(def rna-insertion2s "431_432insn[5]")
+(def rna-insertion2sb "431_432insnnnnn")
+(def rna-insertion2sc "431_432insn[5]")
 (def rna-insertion2 (mut/rna-insertion (coord/rna-coordinate 431 nil nil)
                                        (coord/rna-coordinate 432 nil nil)
                                        "nnnnn"))
 
-(def rna-insertion3s "123_124insL37425.1:23_361")
-(def rna-insertion3 (mut/rna-insertion (coord/rna-coordinate 123 nil nil)
+(def rna-insertion3sc "431_432insn[10]")
+(def rna-insertion3 (mut/rna-insertion (coord/rna-coordinate 431 nil nil)
+                                       (coord/rna-coordinate 432 nil nil)
+                                       "nnnnnnnnnn"))
+
+(def rna-insertion4s "123_124insL37425.1:23_361")
+(def rna-insertion4 (mut/rna-insertion (coord/rna-coordinate 123 nil nil)
                                        (coord/rna-coordinate 124 nil nil)
                                        {:genbank "L37425.1"
                                         :coord-start 23
@@ -845,17 +851,22 @@
 
 (deftest format-rna-insertion-test
   (testing "returns a string expression of a RNA insertion"
-    (are [m s] (= (mut/format m nil) s)
-      rna-insertion1 rna-insertion1s
-      rna-insertion2 rna-insertion2s
-      rna-insertion3 rna-insertion3s)))
+    (are [m o s] (= (mut/format m o) s)
+      rna-insertion1 nil rna-insertion1s
+      rna-insertion2 {:ins-format :auto} rna-insertion2sb
+      rna-insertion2 {:ins-format :bases} rna-insertion2sb
+      rna-insertion2 {:ins-format :count} rna-insertion2sc
+      rna-insertion3 {:ins-format :auto} rna-insertion3sc
+      rna-insertion4 nil rna-insertion4s)))
 
 (deftest parse-rna-insertion-test
   (testing "returns a correct RNAInsertion"
     (are [s m] (= (mut/parse-rna-insertion s) m)
       rna-insertion1s rna-insertion1
-      rna-insertion2s rna-insertion2
-      rna-insertion3s rna-insertion3))
+      rna-insertion2sb rna-insertion2
+      rna-insertion2sc rna-insertion2
+      rna-insertion3sc rna-insertion3
+      rna-insertion4s rna-insertion4))
   (testing "invalid RNA insertion"
     (are [s] (thrown? #?(:clj Throwable, :cljs js/Error)
                       (mut/parse-rna-insertion s))
@@ -969,20 +980,40 @@
                                nil
                                "c"))
 
+(def rna-indel4sb "775_777delinsnnnnn")
+(def rna-indel4sc "775_777delinsn[5]")
+(def rna-indel4 (mut/rna-indel (coord/rna-coordinate 775 nil nil)
+                               (coord/rna-coordinate 777 nil nil)
+                               nil
+                               "nnnnn"))
+
+(def rna-indel5sc "775_777delinsn[10]")
+(def rna-indel5 (mut/rna-indel (coord/rna-coordinate 775 nil nil)
+                               (coord/rna-coordinate 777 nil nil)
+                               nil
+                               "nnnnnnnnnn"))
+
 (deftest format-rna-indel-test
   (testing "returns a string expression of a RNA indel"
     (are [m o s] (= (mut/format m o) s)
       rna-indel1 nil rna-indel1s
       rna-indel2 nil rna-indel2ss
       rna-indel2 {:show-bases? true} rna-indel2s
-      rna-indel3 nil rna-indel3s)))
+      rna-indel3 nil rna-indel3s
+      rna-indel4 {:ins-format :auto} rna-indel4sb
+      rna-indel4 {:ins-format :bases} rna-indel4sb
+      rna-indel4 {:ins-format :count} rna-indel4sc
+      rna-indel5 {:ins-format :auto} rna-indel5sc)))
 
 (deftest parse-rna-indel-test
   (testing "returns a correct RNAIndel"
     (are [s m] (= (mut/parse-rna-indel s) m)
       rna-indel1s rna-indel1
       rna-indel2s rna-indel2
-      rna-indel3s rna-indel3))
+      rna-indel3s rna-indel3
+      rna-indel4sb rna-indel4
+      rna-indel4sc rna-indel4
+      rna-indel5sc rna-indel5))
   (testing "invalid RNA indel"
     (are [s] (thrown? #?(:clj Throwable, :cljs js/Error)
                       (mut/parse-rna-indel s))
@@ -1327,11 +1358,20 @@
                                                "Leu" (coord/protein-coordinate 24)
                                                ["Arg" "Ser" "Ter"]))
 
-(def protein-insertion3s "Arg78_Gly79insX[5]")
+(def protein-insertion3sal "Arg78_Gly79insXaaXaaXaaXaaXaa")
+(def protein-insertion3sas "R78_G79insXXXXX")
+(def protein-insertion3sc "Arg78_Gly79insX[5]")
+(def protein-insertion3scs "R78_G79insX[5]")
 (def protein-insertion3 (mut/protein-insertion "Arg" (coord/protein-coordinate 78)
                                                "Gly" (coord/protein-coordinate 79)
                                                ["Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))
 
+(def protein-insertion4sc "Arg78_Gly79insX[10]")
+(def protein-insertion4scs "R78_G79insX[10]")
+(def protein-insertion4 (mut/protein-insertion "Arg" (coord/protein-coordinate 78)
+                                               "Gly" (coord/protein-coordinate 79)
+                                               ["Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))
+
 (deftest format-protein-insertion-test
   (testing "returns a string expression of a protein insertion"
     (are [m o s] (= (mut/format m o) s)
@@ -1339,7 +1379,14 @@
       protein-insertion1 {:amino-acid-format :short} protein-insertion1ss
       protein-insertion2 nil protein-insertion2s
       protein-insertion2 {:amino-acid-format :short} protein-insertion2ss
-      protein-insertion3 nil protein-insertion3s)))
+      protein-insertion3 {:ins-format :auto} protein-insertion3sal
+      protein-insertion3 {:amino-acid-format :short :ins-format :auto} protein-insertion3sas
+      protein-insertion3 {:ins-format :amino-acids} protein-insertion3sal
+      protein-insertion3 {:amino-acid-format :short :ins-format :amino-acids} protein-insertion3sas
+      protein-insertion3 {:ins-format :count} protein-insertion3sc
+      protein-insertion3 {:amino-acid-format :short :ins-format :count} protein-insertion3scs
+      protein-insertion4 {:ins-format :auto} protein-insertion4sc
+      protein-insertion4 {:amino-acid-format :short :ins-format :auto} protein-insertion4scs)))
 
 (deftest parse-protein-insertion-test
   (testing "returns a correct ProteinInsertion"
@@ -1348,7 +1395,12 @@
       protein-insertion1ss protein-insertion1
       protein-insertion2s protein-insertion2
       protein-insertion2ss protein-insertion2
-      protein-insertion3s protein-insertion3))
+      protein-insertion3sal protein-insertion3
+      protein-insertion3sas protein-insertion3
+      protein-insertion3sc protein-insertion3
+      protein-insertion3scs protein-insertion3
+      protein-insertion4sc protein-insertion4
+      protein-insertion4scs protein-insertion4))
   (testing "invalid protein insertion"
     (are [s] (thrown? #?(:clj Throwable, :cljs js/Error)
                       (mut/parse-protein-insertion s))
@@ -1389,6 +1441,20 @@
                                        "Lys" (coord/protein-coordinate 29)
                                        ["Ter"]))
 
+(def protein-indel4sal "Cys28_Lys29delinsXaaXaaXaaXaaXaa")
+(def protein-indel4sas "C28_K29delinsXXXXX")
+(def protein-indel4sc "Cys28_Lys29delinsX[5]")
+(def protein-indel4scs "C28_K29delinsX[5]")
+(def protein-indel4 (mut/protein-indel "Cys" (coord/protein-coordinate 28)
+                                       "Lys" (coord/protein-coordinate 29)
+                                       ["Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))
+
+(def protein-indel5sc "Cys28_Lys29delinsX[10]")
+(def protein-indel5scs "C28_K29delinsX[10]")
+(def protein-indel5 (mut/protein-indel "Cys" (coord/protein-coordinate 28)
+                                       "Lys" (coord/protein-coordinate 29)
+                                       ["Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))
+
 (deftest format-protein-indel-test
   (testing "returns a string expression of a protein indel"
     (are [m o s] (= (mut/format m o) s)
@@ -1397,7 +1463,15 @@
       protein-indel2 nil protein-indel2s
       protein-indel2 {:amino-acid-format :short} protein-indel2ss
       protein-indel3 nil protein-indel3s
-      protein-indel3 {:amino-acid-format :short} protein-indel3ss)))
+      protein-indel3 {:amino-acid-format :short} protein-indel3ss
+      protein-indel4 {:ins-format :auto} protein-indel4sal
+      protein-indel4 {:amino-acid-format :short :ins-format :auto} protein-indel4sas
+      protein-indel4 {:ins-format :amino-acids} protein-indel4sal
+      protein-indel4 {:amino-acid-format :short :ins-format :amino-acids} protein-indel4sas
+      protein-indel4 {:ins-format :count} protein-indel4sc
+      protein-indel4 {:amino-acid-format :short :ins-format :count} protein-indel4scs
+      protein-indel5 {:ins-format :auto} protein-indel5sc
+      protein-indel5 {:amino-acid-format :short :ins-format :auto} protein-indel5scs)))
 
 (deftest parse-protein-indel-test
   (testing "returns a correct ProteinIndel"
@@ -1407,7 +1481,13 @@
       protein-indel2s protein-indel2
       protein-indel2ss protein-indel2
       protein-indel3s protein-indel3
-      protein-indel3ss protein-indel3))
+      protein-indel3ss protein-indel3
+      protein-indel4sal protein-indel4
+      protein-indel4sas protein-indel4
+      protein-indel4sc protein-indel4
+      protein-indel4scs protein-indel4
+      protein-indel5sc protein-indel5
+      protein-indel5scs protein-indel5))
   (testing "invalid protein indel"
     (are [s] (thrown? #?(:clj Throwable, :cljs js/Error)
                       (mut/parse-protein-indel s))

From 36d828df711d6e71ecf600853c8d8e35faaa2ab9 Mon Sep 17 00:00:00 2001
From: Nobuaki Karasawa <nkarasawa@xcoo.jp>
Date: Wed, 16 Oct 2024 10:10:52 +0900
Subject: [PATCH 6/6] chore: Prepare for next development iteration
 (0.5.0-SNAPSHOT)

---
 project.clj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project.clj b/project.clj
index 0af15e3..d0d6315 100644
--- a/project.clj
+++ b/project.clj
@@ -1,4 +1,4 @@
-(defproject clj-hgvs "0.4.7"
+(defproject clj-hgvs "0.5.0-SNAPSHOT"
   :description "Clojure(Script) library for handling HGVS"
   :url "https://github.com/chrovis/clj-hgvs"
   :license {:name "Apache License, Version 2.0"