heremaps · Amaneusz · Oct 10, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 21, 2024
diff --git a/here-naksha-lib-base/src/commonMain/kotlin/naksha/base/NormalizerForm.kt b/here-naksha-lib-base/src/commonMain/kotlin/naksha/base/NormalizerForm.kt
@@ -4,7 +4,7 @@ import kotlin.js.ExperimentalJsExport
 import kotlin.js.JsExport
 
 /**
- * Please refer []Unicode Normalization Forms](https://www.unicode.org/reports/tr15/#Norm_Forms)
+ * Please refer [Unicode Normalization Forms](https://www.unicode.org/reports/tr15/#Norm_Forms)
  */
 @OptIn(ExperimentalJsExport::class)
 @JsExport

diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/Tag.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/Tag.kt
diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagList.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagList.kt
@@ -3,7 +3,9 @@
 package naksha.model
 
 import naksha.base.ListProxy
-import naksha.model.XyzNs.XyzNsCompanion.normalizeTag
+import naksha.base.NormalizerForm
+import naksha.base.Platform
+import naksha.model.TagNormalizer.TagNormalizer_C.normalizeTag
 import kotlin.js.JsExport
 import kotlin.js.JsName
 import kotlin.js.JsStatic
@@ -24,21 +26,6 @@ open class TagList() : ListProxy<String>(String::class) {
         addTags(listOf(*tags), false)
     }
 
-    companion object TagList_C {
-        /**
-         * Create a tag list from the given array; the tags are normalized.
-         * @param tags the tags.
-         * @return the tag-list.
-         */
-        @JvmStatic
-        @JsStatic
-        fun fromArray(tags: Array<String>): TagList {
-            val list = TagList()
-            list.addAndNormalizeTags(*tags)
-            return list
-        }
-    }
-
     /**
      * Returns 'true' if the tag was removed, 'false' if it was not present.
      *
@@ -161,9 +148,44 @@ open class TagList() : ListProxy<String>(String::class) {
         return this
     }
 
+
     /**
      * Convert this tag-list into a tag-map.
      * @return this tag-list as tag-map.
      */
     fun toTagMap(): TagMap = TagMap(this)
+
+    companion object TagList_C {
+        /**
+         * Create a tag list from the given array; the tags are normalized.
+         * @param tags the tags.
+         * @return the tag-list.
+         */
+        @JvmStatic
+        @JsStatic
+        fun fromArray(tags: Array<String>): TagList {
+            val list = TagList()
+            list.addAndNormalizeTags(*tags)
+            return list
+        }
+
+        /**
+         * A method to normalize a list of tags.
+         *
+         * @param tags a list of tags.
+         * @return the same list, just that the content is normalized.
+         */
+        @JvmStatic
+        @JsStatic
+        fun normalizeTags(tags: TagList?): TagList? {
+            if (!tags.isNullOrEmpty()) {
+                for ((idx, tag) in tags.withIndex()) {
+                    if (tag != null) {
+                        tags[idx] = normalizeTag(tag)
+                    }
+                }
+            }
+            return tags
+        }
+    }
 }
diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagMap.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagMap.kt
@@ -2,25 +2,32 @@
 
 package naksha.model
 
+import naksha.base.Int64
 import naksha.base.MapProxy
-import naksha.model.request.query.*
 import kotlin.js.JsExport
 import kotlin.js.JsName
 
-// TODO: Document me!
-//       Improve me!
-
+/**
+ * Map of tags persisted as (key, value) pairs where values are nullable.
+ * This class represents the persisted form of [TagList].
+ * It is stored as byte_array and can be accessed in PG via `naksha_tags` function.
+ *
+ * It is advised to only construct it in one of two ways:
+ * 1) Via [TagList]-based constructor
+ * 2) By deserializing byte array fetched from DB
+ *
+ * If for some reason, one would like to use it otherwise, it is advised to properly prepare tags upfront
+ * with use of [TagNormalizer] (that is used for example by [TagList])
+ */
 @JsExport
-open class TagMap() : MapProxy<String, Tag>(String::class, Tag::class) {
+open class TagMap() : MapProxy<String, Any>(String::class, Any::class) {
 
     @Suppress("LeakingThis")
     @JsName("of")
-    constructor(tagList: TagList) : this(){
-        for (s in tagList) {
-            if (s == null) continue
-            val tag = Tag.parse(s)
-            put(tag.key, tag)
-        }
+    constructor(tagList: TagList) : this() {
+        tagList.filterNotNull()
+            .map { TagNormalizer.splitNormalizedTag(it) }
+            .forEach { (key, value) -> put(key, value) }
     }
 
     /**
@@ -29,10 +36,28 @@ open class TagMap() : MapProxy<String, Tag>(String::class, Tag::class) {
      */
     fun toTagList(): TagList {
         val list = TagList()
-        for (e in this) {
-            val tag = e.value?.tag
-            if (tag != null) list.add(tag)
+        forEach { (key, value) ->
+            list.add(flattenTag(key, value))
         }
         return list
     }
+
+    /**
+     * Converts (key, value) pair to String, so it can be part of [TagList].
+     * The result depends on the value:
+     * - Null value is omitted: ('foo', null) -> 'foo'
+     * - String value is separated with simple '=': ('foo', 'bar') -> 'foo=bar'
+     * - Numbers and booleans are separated with ':=' -> 'foo:=true', 'foo:=12.34'
+     */
+    private fun flattenTag(key: String, value: Any?): String =
+        when (value) {
+            null -> key
+            is String -> "$key=$value"
+            is Boolean, is Long, is Int64 -> "$key:=$value"
+            is Number -> "$key:=${value.toDouble()}"
+            else -> throw NakshaException(
+                NakshaError.ILLEGAL_ARGUMENT,
+                "Tag values can only be String, Boolean or Number"
+            )
+        }
 }
diff --git a/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagNormalizer.kt b/here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagNormalizer.kt
@@ -0,0 +1,129 @@
+package naksha.model
+
+import naksha.base.NormalizerForm
+import naksha.base.NormalizerForm.NFD
+import naksha.base.NormalizerForm.NFKC
+import naksha.base.Platform
+import naksha.model.TagNormalizer.TagNormalizer_C.normalizeTag
+import naksha.model.TagNormalizer.TagNormalizer_C.splitNormalizedTag
+
+/**
+ * An object used for Tag normalization and splitting.
+ *
+ * Process of normalization happens in [normalizeTag] method and includes following steps:
+ * 1) Always: apply normalization form (see [NormalizerForm])
+ * 2) Conditional: lowercase the whole tag
+ * 3) Conditional: remove all non-ASCII characters
+ *
+ * Normalization form used in step #1 and subsequent conditional steps depend on tag prefix.
+ *
+ * Process of splitting happens in [splitNormalizedTag] method.
+ * Note that not all tags can be split, it depends on their prefix.
+ *
+ * Summarised per-prefix behavior:
+ * +----------+------------+-----------+----------+-------+
+ * | prefix   | norm. form | lowercase | no ASCII | split |
+ * +----------+------------+-----------+----------+-------+
+ * | @        | NFKC       | false     | false    | true  |
+ * | ref_     | NFKC       | false     | false    | false |
+ * | ~        | NFD        | false     | true     | true  |
+ * | #        | NFD        | false     | true     | true  |
+ * | sourceID | NFKC       | false     | false    | false |
+ * | < ELSE > | NFD        | true      | true     | true  |
+ * +----------+------------+-----------+----------+-------+
+ *
+ * By default, (if no special prefix is found) tag is normalized with NFD, lowercased, cleaned of non-ASCII and splittable.
+ */
+class TagNormalizer private constructor() {
+    private data class TagProcessingPolicy(
+        val normalizerForm: NormalizerForm,
+        val removeNonAscii: Boolean,
+        val lowercase: Boolean,
+        val split: Boolean
+    )
+
+    companion object TagNormalizer_C {
+        private val DEFAULT_POLICY =
+            TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = true, split = true)
+        private val PREFIX_TO_POLICY = mapOf(
+            "@" to TagProcessingPolicy( NFKC, removeNonAscii = false, lowercase = false, split = true),
+            "ref_" to TagProcessingPolicy( NFKC, removeNonAscii = false, lowercase = false, split = false),
+            "sourceID" to TagProcessingPolicy( NFKC, removeNonAscii = false, lowercase = false, split = false),
+            "~" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true),
+            "#" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true)
+        )
+
+        private val AS_IS: CharArray = CharArray(128 - 32) { (it + 32).toChar() }
+        private val TO_LOWER: CharArray = CharArray(128 - 32) { (it + 32).toChar().lowercaseChar() }
+
+        /**
+         * Main method for raw tag normalization. See[TagNormalizer] doc for more
+         */
+        fun normalizeTag(tag: String): String {
+            val policy = policyFor(tag)
+            val normalized = Platform.normalize(tag, policy.normalizerForm)
+            return if (policy.lowercase) {
+                if (policy.removeNonAscii) {
+                    removeNonAscii(normalized, TO_LOWER)
+                } else {
+                    normalized.lowercase()
+                }
+            } else if (policy.removeNonAscii){
+                removeNonAscii(normalized, AS_IS)
+            } else {
+                normalized
+            }
+        }
+
+        private fun removeNonAscii(input: String, outputCharacterSet: CharArray): String {
+            val sb = StringBuilder()
+            for (element in input) {
+                val c = (element.code - 32).toChar()
+                if (c.code < outputCharacterSet.size) {
+                    sb.append(outputCharacterSet[c.code])
+                }
+            }
+            return sb.toString()
+        }
+
+
+        /**
+         * Main method for normalized tag splitting. See[TagNormalizer] doc for more
+         */
+        fun splitNormalizedTag(normalizedTag: String): Pair<String, Any?> {
+            if (!policyFor(normalizedTag).split) {
+                return normalizedTag to null
+            }
+            val i = normalizedTag.indexOf('=')
+            val key: String
+            val value: Any?
+            if (i > 1) {
+                if (normalizedTag[i - 1] == ':') { // :=
+                    key = normalizedTag.substring(0, i - 1).trim()
+                    val raw = normalizedTag.substring(i + 1).trim()
+                    value = if ("true".equals(raw, ignoreCase = true)) {
+                        true
+                    } else if ("false".equals(raw, ignoreCase = true)) {
+                        false
+                    } else {
+                        raw.toDouble()
+                    }
+                } else {
+                    key = normalizedTag.substring(0, i).trim()
+                    value = normalizedTag.substring(i + 1).trim()
+                }
+            } else {
+                key = normalizedTag
+                value = null
+            }
+            return key to value
+        }
+
+        private fun policyFor(tag: String): TagProcessingPolicy {
+            for ((prefix, policy) in PREFIX_TO_POLICY) {
+                if (tag.startsWith(prefix)) return policy
+            }
+            return DEFAULT_POLICY
+        }
+    }
+}