Skip to content

Commit

Permalink
CASL-561 tag normalization
Browse files Browse the repository at this point in the history
Signed-off-by: Jakub Amanowicz <[email protected]>
  • Loading branch information
Amaneusz committed Oct 18, 2024
1 parent 9979460 commit 575e106
Show file tree
Hide file tree
Showing 17 changed files with 484 additions and 167 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import kotlin.js.ExperimentalJsExport
import kotlin.js.JsExport

/**
* Please refer []Unicode Normalization Forms](https://www.unicode.org/reports/tr15/#Norm_Forms)
* Please refer [Unicode Normalization Forms](https://www.unicode.org/reports/tr15/#Norm_Forms)
*/
@OptIn(ExperimentalJsExport::class)
@JsExport
Expand Down
52 changes: 16 additions & 36 deletions here-naksha-lib-model/src/commonMain/kotlin/naksha/model/Tag.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
package naksha.model

import naksha.base.*
import naksha.base.NormalizerForm.NFD
import naksha.base.NormalizerForm.NFKC
import naksha.model.NakshaError.NakshaErrorCompanion.ILLEGAL_ARGUMENT
import kotlin.js.JsExport
import kotlin.js.JsName
Expand All @@ -16,10 +18,10 @@ import kotlin.jvm.JvmStatic
* @property value the value of the tag; _null_, Boolean, String or Double.
*/
@JsExport
class Tag(): AnyObject() {
class Tag() : AnyObject() {

@JsName("of")
constructor(tag: String, key: String, value: Any?): this() {
constructor(tag: String, key: String, value: Any?) : this() {
this.tag = tag
this.key = key
this.value = value
Expand All @@ -32,42 +34,19 @@ class Tag(): AnyObject() {

@JvmStatic
@JsStatic
fun parse(tag: String): Tag {
val i = tag.indexOf('=')
val key: String
val value: Any?
if (i > 1) {
if (tag[i-1] == ':') { // :=
key = tag.substring(0, i-1).trim()
val raw = tag.substring(i + 1).trim()
value = if ("true".equals(raw, ignoreCase = true)) {
true
} else if ("false".equals(raw, ignoreCase = true)) {
false
} else {
raw. toDouble()
}
} else {
key = tag.substring(0, i).trim()
value = tag.substring(i + 1).trim()
}
} else {
key = tag
value = null
fun of(normalizedKey: String, normalizedValue: Any?): Tag = when (normalizedValue) {
null -> Tag(normalizedKey, normalizedKey, null)
is String -> Tag("$normalizedKey=$normalizedValue", normalizedKey, normalizedValue)
is Boolean -> Tag("$normalizedKey:=$normalizedValue", normalizedKey, normalizedValue)
is Number -> {
val doubleValue = normalizedValue.toDouble()
Tag("$normalizedKey:=$doubleValue", normalizedKey, doubleValue)
}
return Tag(tag, key, value)
}

@JvmStatic
@JsStatic
fun of(key: String, value: Any?): Tag = when(value) {
// TODO: Fix normalization!
null -> Tag(key, key, null)
is String -> Tag("$key=$value", key, value)
is Boolean, Double -> Tag("$key:=$value", key, value)
is Number -> of(key, value.toDouble())
is Int64 -> of(key, value.toDouble())
else -> throw NakshaException(ILLEGAL_ARGUMENT, "Tag values can only be String, Boolean or Double")
else -> throw NakshaException(
ILLEGAL_ARGUMENT,
"Tag values can only be String, Boolean or Number"
)
}
}

Expand All @@ -81,6 +60,7 @@ class Tag(): AnyObject() {
if (other is Tag) return tag == other.tag
return false
}

override fun hashCode(): Int = tag.hashCode()
override fun toString(): String = tag
}
54 changes: 38 additions & 16 deletions here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagList.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
package naksha.model

import naksha.base.ListProxy
import naksha.model.XyzNs.XyzNsCompanion.normalizeTag
import naksha.base.NormalizerForm
import naksha.base.Platform
import naksha.model.TagNormalizer.normalizeTag
import kotlin.js.JsExport
import kotlin.js.JsName
import kotlin.js.JsStatic
Expand All @@ -24,21 +26,6 @@ open class TagList() : ListProxy<String>(String::class) {
addTags(listOf(*tags), false)
}

companion object TagList_C {
/**
* Create a tag list from the given array; the tags are normalized.
* @param tags the tags.
* @return the tag-list.
*/
@JvmStatic
@JsStatic
fun fromArray(tags: Array<String>): TagList {
val list = TagList()
list.addAndNormalizeTags(*tags)
return list
}
}

/**
* Returns 'true' if the tag was removed, 'false' if it was not present.
*
Expand Down Expand Up @@ -161,9 +148,44 @@ open class TagList() : ListProxy<String>(String::class) {
return this
}


/**
* Convert this tag-list into a tag-map.
* @return this tag-list as tag-map.
*/
fun toTagMap(): TagMap = TagMap(this)

companion object TagList_C {
/**
* Create a tag list from the given array; the tags are normalized.
* @param tags the tags.
* @return the tag-list.
*/
@JvmStatic
@JsStatic
fun fromArray(tags: Array<String>): TagList {
val list = TagList()
list.addAndNormalizeTags(*tags)
return list
}

/**
* A method to normalize a list of tags.
*
* @param tags a list of tags.
* @return the same list, just that the content is normalized.
*/
@JvmStatic
@JsStatic
fun normalizeTags(tags: TagList?): TagList? {
if (!tags.isNullOrEmpty()) {
for ((idx, tag) in tags.withIndex()) {
if (tag != null) {
tags[idx] = normalizeTag(tag)
}
}
}
return tags
}
}
}
18 changes: 7 additions & 11 deletions here-naksha-lib-model/src/commonMain/kotlin/naksha/model/TagMap.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,21 @@
package naksha.model

import naksha.base.MapProxy
import naksha.model.request.query.*
import kotlin.js.JsExport
import kotlin.js.JsName

// TODO: Document me!
// Improve me!

@JsExport
open class TagMap() : MapProxy<String, Tag>(String::class, Tag::class) {
open class TagMap() : MapProxy<String, Any>(String::class, Any::class) {

@Suppress("LeakingThis")
@JsName("of")
constructor(tagList: TagList) : this(){
for (s in tagList) {
if (s == null) continue
val tag = Tag.parse(s)
put(tag.key, tag)
}
constructor(tagList: TagList) : this() {
tagList.filterNotNull()
.map { TagNormalizer.splitNormalizedTag(it) }
.forEach { tag -> put(tag.key, tag.value) }
}

/**
Expand All @@ -29,9 +26,8 @@ open class TagMap() : MapProxy<String, Tag>(String::class, Tag::class) {
*/
fun toTagList(): TagList {
val list = TagList()
for (e in this) {
val tag = e.value?.tag
if (tag != null) list.add(tag)
forEach { (key, value) ->
list.add(Tag.of(key, value).tag)
}
return list
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package naksha.model

import naksha.base.NormalizerForm
import naksha.base.NormalizerForm.NFD
import naksha.base.NormalizerForm.NFKC
import naksha.base.Platform
import naksha.model.TagNormalizer.normalizeTag
import naksha.model.TagNormalizer.splitNormalizedTag

/**
* An object used for Tag normalization and splitting.
*
* Process of normalization happens in [normalizeTag] method and includes following steps:
* 1) Always: apply normalization form (see [NormalizerForm])
* 2) Conditional: lowercase the whole tag
* 3) Conditional: remove all non-ASCII characters
*
* Normalization form used in step #1 and subsequent conditional steps depend on tag prefix.
*
*
* Process of splitting happens in [splitNormalizedTag] method.
* It is about splitting the normalized tag to [key, value] pair in form of [Tag]
* Note that not all tags can be split, it depends on their prefix.
*
* Summarised per-prefix behavior:
* +----------+------------+-----------+----------+-------+
* | prefix | norm. form | lowercase | no ASCII | split |
* +----------+------------+-----------+----------+-------+
* | @ | NFKC | false | false | true |
* | ref_ | NFKC | false | false | false |
* | ~ | NFD | false | true | true |
* | # | NFD | false | true | true |
* | sourceID | NFKC | false | false | false |
* | < ELSE > | NFD | true | true | true |
* +----------+------------+-----------+----------+-------+
*
* By default, (if no special prefix is found) tag is normalized with NFD, lowercased, cleaned of non-ASCII and splittable.
*/
object TagNormalizer {
private data class TagProcessingPolicy(
val normalizerForm: NormalizerForm,
val removeNonAscii: Boolean,
val lowercase: Boolean,
val split: Boolean
)

private val DEFAULT_POLICY = TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = true, split = true)
private val PREFIX_TO_POLICY = mapOf(
"@" to TagProcessingPolicy(NFKC, removeNonAscii = false, lowercase = false, split = true),
"ref_" to TagProcessingPolicy(NFKC, removeNonAscii = false, lowercase = false, split = false),
"sourceID" to TagProcessingPolicy(NFKC, removeNonAscii = false, lowercase = false, split = false),
"~" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true),
"#" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true)
)

private val PRINTABLE_ASCII_CODES = 32..128

/**
* Main method for raw tag normalization. See[TagNormalizer] doc for more
*/
fun normalizeTag(tag: String): String {
val policy = policyFor(tag)
var normalized = Platform.normalize(tag, policy.normalizerForm)
normalized = if (policy.lowercase) normalized.lowercase() else normalized
normalized = if (policy.removeNonAscii) removeNonAscii(normalized) else normalized
return normalized
}

/**
* Main method for normalized tag splitting. See[TagNormalizer] doc for more
*/
fun splitNormalizedTag(normalizedTag: String): Tag {
if (!policyFor(normalizedTag).split) {
return Tag.of(normalizedKey = normalizedTag, normalizedValue = null)
}
val i = normalizedTag.indexOf('=')
val key: String
val value: Any?
if (i > 1) {
if (normalizedTag[i - 1] == ':') { // :=
key = normalizedTag.substring(0, i - 1).trim()
val raw = normalizedTag.substring(i + 1).trim()
value = if ("true".equals(raw, ignoreCase = true)) {
true
} else if ("false".equals(raw, ignoreCase = true)) {
false
} else {
raw.toDouble()
}
} else {
key = normalizedTag.substring(0, i).trim()
value = normalizedTag.substring(i + 1).trim()
}
} else {
key = normalizedTag
value = null
}
return Tag(normalizedTag, key, value)
}

private fun removeNonAscii(text: String) =
text.filter { it.code in PRINTABLE_ASCII_CODES }

private fun policyFor(tag: String): TagProcessingPolicy {
return PREFIX_TO_POLICY.entries
.firstOrNull { (prefix, _) -> tag.startsWith(prefix, ignoreCase = true) }
?.value
?: DEFAULT_POLICY
}
}
Loading

0 comments on commit 575e106

Please sign in to comment.