-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
V3 casl 561 query tags #370
base: v3
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package naksha.model | ||
|
||
import naksha.base.NormalizerForm | ||
import naksha.base.NormalizerForm.NFD | ||
import naksha.base.NormalizerForm.NFKC | ||
import naksha.base.Platform | ||
import naksha.model.TagNormalizer.TagNormalizer_C.normalizeTag | ||
import naksha.model.TagNormalizer.TagNormalizer_C.splitNormalizedTag | ||
|
||
/** | ||
* An object used for Tag normalization and splitting. | ||
* | ||
* Process of normalization happens in [normalizeTag] method and includes following steps: | ||
* 1) Always: apply normalization form (see [NormalizerForm]) | ||
* 2) Conditional: lowercase the whole tag | ||
* 3) Conditional: remove all non-ASCII characters | ||
* | ||
* Normalization form used in step #1 and subsequent conditional steps depend on tag prefix. | ||
* | ||
* Process of splitting happens in [splitNormalizedTag] method. | ||
* Note that not all tags can be split, it depends on their prefix. | ||
* | ||
* Summarised per-prefix behavior: | ||
* +----------+------------+-----------+----------+-------+ | ||
* | prefix | norm. form | lowercase | no ASCII | split | | ||
* +----------+------------+-----------+----------+-------+ | ||
* | @ | NFKC | false | false | true | | ||
* | ref_ | NFKC | false | false | false | | ||
* | ~ | NFD | false | true | true | | ||
* | # | NFD | false | true | true | | ||
* | sourceID | NFKC | false | false | false | | ||
* | < ELSE > | NFD | true | true | true | | ||
* +----------+------------+-----------+----------+-------+ | ||
* | ||
* By default, (if no special prefix is found) tag is normalized with NFD, lowercased, cleaned of non-ASCII and splittable. | ||
*/ | ||
class TagNormalizer private constructor() { | ||
private data class TagProcessingPolicy( | ||
val normalizerForm: NormalizerForm, | ||
val removeNonAscii: Boolean, | ||
val lowercase: Boolean, | ||
val split: Boolean | ||
) | ||
|
||
companion object TagNormalizer_C { | ||
private val DEFAULT_POLICY = | ||
TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = true, split = true) | ||
private val PREFIX_TO_POLICY = mapOf( | ||
"@" to TagProcessingPolicy( NFKC, removeNonAscii = false, lowercase = false, split = true), | ||
"ref_" to TagProcessingPolicy( NFKC, removeNonAscii = false, lowercase = false, split = false), | ||
"sourceID" to TagProcessingPolicy( NFKC, removeNonAscii = false, lowercase = false, split = false), | ||
"~" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true), | ||
"#" to TagProcessingPolicy(NFD, removeNonAscii = true, lowercase = false, split = true) | ||
) | ||
|
||
private val AS_IS: CharArray = CharArray(128 - 32) { (it + 32).toChar() } | ||
private val TO_LOWER: CharArray = CharArray(128 - 32) { (it + 32).toChar().lowercaseChar() } | ||
|
||
/** | ||
* Main method for raw tag normalization. See[TagNormalizer] doc for more | ||
*/ | ||
fun normalizeTag(tag: String): String { | ||
val policy = policyFor(tag) | ||
val normalized = Platform.normalize(tag, policy.normalizerForm) | ||
return if (policy.lowercase) { | ||
if (policy.removeNonAscii) { | ||
removeNonAscii(normalized, TO_LOWER) | ||
} else { | ||
normalized.lowercase() | ||
} | ||
} else if (policy.removeNonAscii){ | ||
removeNonAscii(normalized, AS_IS) | ||
} else { | ||
normalized | ||
} | ||
} | ||
|
||
private fun removeNonAscii(input: String, outputCharacterSet: CharArray): String { | ||
val sb = StringBuilder() | ||
for (element in input) { | ||
val c = (element.code - 32).toChar() | ||
if (c.code < outputCharacterSet.size) { | ||
sb.append(outputCharacterSet[c.code]) | ||
} | ||
} | ||
return sb.toString() | ||
} | ||
|
||
|
||
/** | ||
* Main method for normalized tag splitting. See[TagNormalizer] doc for more | ||
*/ | ||
fun splitNormalizedTag(normalizedTag: String): Pair<String, Any?> { | ||
if (!policyFor(normalizedTag).split) { | ||
return normalizedTag to null | ||
} | ||
val i = normalizedTag.indexOf('=') | ||
val key: String | ||
val value: Any? | ||
if (i > 1) { | ||
if (normalizedTag[i - 1] == ':') { // := | ||
key = normalizedTag.substring(0, i - 1).trim() | ||
val raw = normalizedTag.substring(i + 1).trim() | ||
value = if ("true".equals(raw, ignoreCase = true)) { | ||
true | ||
} else if ("false".equals(raw, ignoreCase = true)) { | ||
false | ||
} else { | ||
raw.toDouble() | ||
} | ||
} else { | ||
key = normalizedTag.substring(0, i).trim() | ||
value = normalizedTag.substring(i + 1).trim() | ||
} | ||
} else { | ||
key = normalizedTag | ||
value = null | ||
} | ||
return key to value | ||
} | ||
|
||
private fun policyFor(tag: String): TagProcessingPolicy { | ||
for ((prefix, policy) in PREFIX_TO_POLICY) { | ||
if (tag.startsWith(prefix)) return policy | ||
} | ||
return DEFAULT_POLICY | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code is wrong in one thing: The prefix is case-sensitive, so Actually, another thing of this notation is, that it is hard to read for everyone that is not an Java/Kotlin expert. I would like to avoid streaming-APIs, unless there is a big benefit, so this code should be (IMHO): private fun policyFor(tag: String): TagProcessingPolicy {
for ((prefix, policy) in PREFIX_TO_POLICY) {
if (tag.startsWith(prefix)) return policy
}
return DEFAULT_POLICY
} This does not make the code slower, nor longer, but much more readable IMHO. Lets please use loops, unless there is realy specific situation in which a streaming-api provides an advantage. There is another point against streaming API, I saw this so often now. People tend to ignore, that behind a stream there is a loop, and you then often find things like this, especially with inexperienced developers: val a = m.map { (k, v) -> v.a }
val b = m.map { (k, v) -> v.b }
... I really saw this often. If that would have been a loop, it would become clear that you can collect both properties in one loop, you do not need two loops, but the code hides the loop and therefore makes it hard to understand what happens. When loops can be collapsed, it should be done, looping above things is not free of cost IMHO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The The remarkd regarding readability is more crucial IMO, if you don't "feel it" then let's ditch it - c-like style should be fine for all so I'm switching to your suggestion |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will fail for
a=1
, because in this case index will be 1