CASL-561 tag normalization

Signed-off-by: Jakub Amanowicz <[email protected]>
heremaps · Oct 18, 2024 · 575e106 · 575e106
1 parent 9979460
commit 575e106
Show file tree

Hide file tree

Showing 17 changed files with 484 additions and 167 deletions.
diff --git a/here-naksha-lib-base/src/commonMain/kotlin/naksha/base/NormalizerForm.kt b/here-naksha-lib-base/src/commonMain/kotlin/naksha/base/NormalizerForm.kt
@@ -4,7 +4,7 @@ import kotlin.js.ExperimentalJsExport
 import kotlin.js.JsExport
 
 /**
- * Please refer []Unicode Normalization Forms](https://www.unicode.org/reports/tr15/#Norm_Forms)
+ * Please refer [Unicode Normalization Forms](https://www.unicode.org/reports/tr15/#Norm_Forms)
  */
 @OptIn(ExperimentalJsExport::class)
 @JsExport

diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/Tag.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/Tag.kt
@@ -3,6 +3,8 @@
 package naksha.model
 
 import naksha.base.*
+import naksha.base.NormalizerForm.NFD
+import naksha.base.NormalizerForm.NFKC
 import naksha.model.NakshaError.NakshaErrorCompanion.ILLEGAL_ARGUMENT
 import kotlin.js.JsExport
 import kotlin.js.JsName
@@ -16,10 +18,10 @@ import kotlin.jvm.JvmStatic
  * @property value the value of the tag; _null_, Boolean, String or Double.
  */
 @JsExport
-class Tag(): AnyObject() {
+class Tag() : AnyObject() {
 
     @JsName("of")
-    constructor(tag: String, key: String, value: Any?): this() {
+    constructor(tag: String, key: String, value: Any?) : this() {
         this.tag = tag
         this.key = key
         this.value = value
@@ -32,42 +34,19 @@ class Tag(): AnyObject() {
 
         @JvmStatic
         @JsStatic
-        fun parse(tag: String): Tag {
-            val i = tag.indexOf('=')
-            val key: String
-            val value: Any?
-            if (i > 1) {
-                if (tag[i-1] == ':') { // :=
-                    key = tag.substring(0, i-1).trim()
-                    val raw = tag.substring(i + 1).trim()
-                    value = if ("true".equals(raw, ignoreCase = true)) {
-                        true
-                    } else if ("false".equals(raw, ignoreCase = true)) {
-                        false
-                    } else {
-                        raw. toDouble()
-                    }
-                } else {
-                    key = tag.substring(0, i).trim()
-                    value = tag.substring(i + 1).trim()
-                }
-            } else {
-                key = tag
-                value = null
+        fun of(normalizedKey: String, normalizedValue: Any?): Tag = when (normalizedValue) {
+            null -> Tag(normalizedKey, normalizedKey, null)
+            is String -> Tag("$normalizedKey=$normalizedValue", normalizedKey, normalizedValue)
+            is Boolean -> Tag("$normalizedKey:=$normalizedValue", normalizedKey, normalizedValue)
+            is Number -> {
+                val doubleValue = normalizedValue.toDouble()
+                Tag("$normalizedKey:=$doubleValue", normalizedKey, doubleValue)
             }
-            return Tag(tag, key, value)
-        }
 
-        @JvmStatic
-        @JsStatic
-        fun of(key: String, value: Any?): Tag = when(value) {
-            // TODO: Fix normalization!
-            null -> Tag(key, key, null)
-            is String -> Tag("$key=$value", key, value)
-            is Boolean, Double -> Tag("$key:=$value", key, value)
-            is Number -> of(key, value.toDouble())
-            is Int64 -> of(key, value.toDouble())
-            else -> throw NakshaException(ILLEGAL_ARGUMENT, "Tag values can only be String, Boolean or Double")
+            else -> throw NakshaException(
+                ILLEGAL_ARGUMENT,
+                "Tag values can only be String, Boolean or Number"
+            )
         }
     }
 
@@ -81,6 +60,7 @@ class Tag(): AnyObject() {
         if (other is Tag) return tag == other.tag
         return false
     }
+
     override fun hashCode(): Int = tag.hashCode()
     override fun toString(): String = tag
 }
diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagList.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagList.kt
@@ -3,7 +3,9 @@
 package naksha.model
 
 import naksha.base.ListProxy
-import naksha.model.XyzNs.XyzNsCompanion.normalizeTag
+import naksha.base.NormalizerForm
+import naksha.base.Platform
+import naksha.model.TagNormalizer.normalizeTag
 import kotlin.js.JsExport
 import kotlin.js.JsName
 import kotlin.js.JsStatic
@@ -24,21 +26,6 @@ open class TagList() : ListProxy<String>(String::class) {
         addTags(listOf(*tags), false)
     }
 
-    companion object TagList_C {
-        /**
-         * Create a tag list from the given array; the tags are normalized.
-         * @param tags the tags.
-         * @return the tag-list.
-         */
-        @JvmStatic
-        @JsStatic
-        fun fromArray(tags: Array<String>): TagList {
-            val list = TagList()
-            list.addAndNormalizeTags(*tags)
-            return list
-        }
-    }
-
     /**
      * Returns 'true' if the tag was removed, 'false' if it was not present.
      *
@@ -161,9 +148,44 @@ open class TagList() : ListProxy<String>(String::class) {
         return this
     }
 
+
     /**
      * Convert this tag-list into a tag-map.
      * @return this tag-list as tag-map.
      */
     fun toTagMap(): TagMap = TagMap(this)
+
+    companion object TagList_C {
+        /**
+         * Create a tag list from the given array; the tags are normalized.
+         * @param tags the tags.
+         * @return the tag-list.
+         */
+        @JvmStatic
+        @JsStatic
+        fun fromArray(tags: Array<String>): TagList {
+            val list = TagList()
+            list.addAndNormalizeTags(*tags)
+            return list
+        }
+
+        /**
+         * A method to normalize a list of tags.
+         *
+         * @param tags a list of tags.
+         * @return the same list, just that the content is normalized.
+         */
+        @JvmStatic
+        @JsStatic
+        fun normalizeTags(tags: TagList?): TagList? {
+            if (!tags.isNullOrEmpty()) {
+                for ((idx, tag) in tags.withIndex()) {
+                    if (tag != null) {
+                        tags[idx] = normalizeTag(tag)
+                    }
+                }
+            }
+            return tags
+        }
+    }
 }
diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagMap.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagMap.kt
@@ -3,24 +3,21 @@
 package naksha.model
 
 import naksha.base.MapProxy
-import naksha.model.request.query.*
 import kotlin.js.JsExport
 import kotlin.js.JsName
 
 // TODO: Document me!
 //       Improve me!
 
 @JsExport
-open class TagMap() : MapProxy<String, Tag>(String::class, Tag::class) {
+open class TagMap() : MapProxy<String, Any>(String::class, Any::class) {
 
     @Suppress("LeakingThis")
     @JsName("of")
-    constructor(tagList: TagList) : this(){
-        for (s in tagList) {
-            if (s == null) continue
-            val tag = Tag.parse(s)
-            put(tag.key, tag)
-        }
+    constructor(tagList: TagList) : this() {
+        tagList.filterNotNull()
+            .map { TagNormalizer.splitNormalizedTag(it) }
+            .forEach { tag -> put(tag.key, tag.value) }
     }
 
     /**
@@ -29,9 +26,8 @@ open class TagMap() : MapProxy<String, Tag>(String::class, Tag::class) {
      */
     fun toTagList(): TagList {
         val list = TagList()
-        for (e in this) {
-            val tag = e.value?.tag
-            if (tag != null) list.add(tag)
+        forEach { (key, value) ->
+            list.add(Tag.of(key, value).tag)
         }
         return list
     }

diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagNormalizer.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagNormalizer.kt
@@ -0,0 +1,110 @@
+package naksha.model
+
+import naksha.base.NormalizerForm
+import naksha.base.NormalizerForm.NFD
+import naksha.base.NormalizerForm.NFKC
+import naksha.base.Platform
+import naksha.model.TagNormalizer.normalizeTag
+import naksha.model.TagNormalizer.splitNormalizedTag
+
+/**
+ * An object used for Tag normalization and splitting.
+ *
+ * Process of normalization happens in [normalizeTag] method and includes following steps:
+ * 1) Always: apply normalization form (see [NormalizerForm])
+ * 2) Conditional: lowercase the whole tag
+ * 3) Conditional: remove all non-ASCII characters
+ *
+ * Normalization form used in step #1 and subsequent conditional steps depend on tag prefix.
+ *
+ *
+ * Process of splitting happens in [splitNormalizedTag] method.
+ * It is about splitting the normalized tag to [key, value] pair in form of [Tag]
+ * Note that not all tags can be split, it depends on their prefix.
+ *
+ * Summarised per-prefix behavior:
+ * +----------+------------+-----------+----------+-------+
+ * | prefix   | norm. form | lowercase | no ASCII | split |
+ * +----------+------------+-----------+----------+-------+
+ * | @        | NFKC       | false     | false    | true  |
+ * | ref_     | NFKC       | false     | false    | false |
+ * | ~        | NFD        | false     | true     | true  |
+ * | #        | NFD        | false     | true     | true  |
+ * | sourceID | NFKC       | false     | false    | false |
+ * | < ELSE > | NFD        | true      | true     | true  |
+ * +----------+------------+-----------+----------+-------+
+ *
+ * By default, (if no special prefix is found) tag is normalized with NFD, lowercased, cleaned of non-ASCII and splittable.
+ */
+object TagNormalizer {
+    private data class TagProcessingPolicy(
+        val normalizerForm: NormalizerForm,
+        val removeNonAscii: Boolean,
+        val lowercase: Boolean,
+        val split: Boolean
+    )
+
+    private val DEFAULT_POLICY = TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = true, split = true)
+    private val PREFIX_TO_POLICY = mapOf(
+        "@" to TagProcessingPolicy(NFKC, removeNonAscii = false, lowercase = false, split = true),
+        "ref_" to TagProcessingPolicy(NFKC, removeNonAscii = false, lowercase = false, split = false),
+        "sourceID" to TagProcessingPolicy(NFKC, removeNonAscii = false, lowercase = false, split = false),
+        "~" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true),
+        "#" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true)
+    )
+
+    private val PRINTABLE_ASCII_CODES = 32..128
+
+    /**
+     * Main method for raw tag normalization. See[TagNormalizer] doc for more
+     */
+    fun normalizeTag(tag: String): String {
+        val policy = policyFor(tag)
+        var normalized = Platform.normalize(tag, policy.normalizerForm)
+        normalized = if (policy.lowercase) normalized.lowercase() else normalized
+        normalized = if (policy.removeNonAscii) removeNonAscii(normalized) else normalized
+        return normalized
+    }
+
+    /**
+     * Main method for normalized tag splitting. See[TagNormalizer] doc for more
+     */
+    fun splitNormalizedTag(normalizedTag: String): Tag {
+        if (!policyFor(normalizedTag).split) {
+            return Tag.of(normalizedKey = normalizedTag, normalizedValue = null)
+        }
+        val i = normalizedTag.indexOf('=')
+        val key: String
+        val value: Any?
+        if (i > 1) {
+            if (normalizedTag[i - 1] == ':') { // :=
+                key = normalizedTag.substring(0, i - 1).trim()
+                val raw = normalizedTag.substring(i + 1).trim()
+                value = if ("true".equals(raw, ignoreCase = true)) {
+                    true
+                } else if ("false".equals(raw, ignoreCase = true)) {
+                    false
+                } else {
+                    raw.toDouble()
+                }
+            } else {
+                key = normalizedTag.substring(0, i).trim()
+                value = normalizedTag.substring(i + 1).trim()
+            }
+        } else {
+            key = normalizedTag
+            value = null
+        }
+        return Tag(normalizedTag, key, value)
+    }
+
+    private fun removeNonAscii(text: String) =
+        text.filter { it.code in PRINTABLE_ASCII_CODES }
+
+    private fun policyFor(tag: String): TagProcessingPolicy {
+        return PREFIX_TO_POLICY.entries
+            .firstOrNull { (prefix, _) -> tag.startsWith(prefix, ignoreCase = true) }
+            ?.value
+            ?: DEFAULT_POLICY
+    }
+}