diff --git a/shared/src/main/kotlin/Strings.kt b/shared/src/main/kotlin/Strings.kt index 6dbdb8e758..84a43e2220 100644 --- a/shared/src/main/kotlin/Strings.kt +++ b/shared/src/main/kotlin/Strings.kt @@ -19,7 +19,6 @@ package ai.tock.shared import org.apache.commons.lang3.StringEscapeUtils.escapeHtml4 import java.text.Normalizer import java.util.Locale -import java.util.regex.Matcher import java.util.regex.Pattern @@ -82,36 +81,46 @@ fun concat(s1: String?, s2: String?): String { } private val trailingRegexp = "[.,:;?!]+$".toRegex() -private val accentsRegexp = "[\\p{InCombiningDiacriticalMarks}]".toRegex() +private val accentsRegexp = "\\p{InCombiningDiacriticalMarks}+".toRegex() -private var regexToDetectHTMLAllowedBalise = "(?:<CHANGE_IT)(.*?)(?:>)" +private const val HTML_TAG_PLACEHOLDER = "CHANGE_IT" +private var regexToDetectHTMLAllowedBalise = "(?:<$HTML_TAG_PLACEHOLDER)(.*?)(?:>)" private val regexToDetectNotAllowedValue = property("tock_safehtml_block_tag", "(?i)s*(script|iframe|object|embed|form|input|link|meta|onload|alert|onerror|href)[^>]") private val allowedList = listProperty("tock_safehtml_allowed_tag", listOf("ul", "li", "⭐")) -val htmlToFrenchLettre = mapOf("à" to "à", "â" to "â", "ä" to "ä", "ç" to "ç", "è" to "è", - "é" to "é", "ê" to "ê", "ë" to "ë", "î" to "î", "ï" to "ï", "ô" to "ô", - "ö" to "ö", "ù" to "ù", "û" to "û", "ü" to "ü", "ñ" to "ñ") +val htmlToFrenchLettre = mapOf( + "à" to "à", "â" to "â", "ä" to "ä", "ç" to "ç", + "è" to "è", "é" to "é", "ê" to "ê", "ë" to "ë", + "î" to "î", "ï" to "ï", "ô" to "ô", "ö" to "ö", + "ù" to "ù", "û" to "û", "ü" to "ü", "ñ" to "ñ" +) - -private fun String.removeTrailingPunctuation() = this.replace(trailingRegexp, "").trim() +private fun String.removeTrailingPunctuation() = replace(trailingRegexp, "").trim() fun String.stripAccents(): String = Normalizer.normalize(this, Normalizer.Form.NFD).replace(accentsRegexp, "") fun String.normalize(locale: Locale): String = - this.lowercase(locale).removeTrailingPunctuation().stripAccents() - -fun allowDiacriticsInRegexp(s: String) : String = s.replace("e", "[eéèêë]", ignoreCase = true) - .replace("a", "[aàáâãä]", ignoreCase = true) - .replace("i", "[iìíîï]", ignoreCase = true) - .replace("o", "[oòóôõöø]", ignoreCase = true) - .replace("u", "[uùúûü]", ignoreCase = true) - .replace("n", "[nñ]", ignoreCase = true) - .replace(" ", "['-_ ]") - .replace("c", "[cç]", ignoreCase = true) + lowercase(locale).removeTrailingPunctuation().stripAccents() + +fun allowDiacriticsInRegexp(s: String): String { + val replacements = mapOf( + 'e' to "[eéèêë]", + 'a' to "[aàáâãä]", + 'i' to "[iìíîï]", + 'o' to "[oòóôõöø]", + 'u' to "[uùúûü]", + 'n' to "[nñ]", + 'c' to "[cç]" + ) + + return s.fold("") { acc, c -> + acc + (replacements[c.lowercaseChar()] ?: c) + }.replace(" ", "['-_ ]") +} - fun safeHTML(value: String): String { +fun safeHTML(value: String): String { var simpelValue = escapeHtml4(value) @@ -141,7 +150,7 @@ fun allowDiacriticsInRegexp(s: String) : String = s.replace("e", "[eéèêë]", // Remove bad value simpelValue = extractAndRemoveBadValue(regexToDetectNotAllowedValue, simpelValue) - return removeNonAscii(simpelValue) + return filterAllowedAndStandardCharacters(simpelValue) } private fun detectIfHTMLBaliseIsAllowed(text: String, allowed: String): String? { @@ -151,41 +160,38 @@ private fun detectIfHTMLBaliseIsAllowed(text: String, allowed: String): String? private fun extractAndRemoveBadValue(regexValue: String, value: String): String { val pattern = Pattern.compile(regexValue, Pattern.MULTILINE) - val matcher: Matcher = pattern.matcher(value) - var tmp = value + val matcher = pattern.matcher(value) + var tmp = value while (matcher.find()) { for (i in 1..matcher.groupCount()) { - tmp = tmp.replace(matcher.group(i).toString(), "") + matcher.group(i)?.let { + tmp = tmp.replace(it, "") + } } } return tmp } -fun extractFullMatcherWithRegex(regexValue: String?, value: String?): String? { - if (regexValue == null || value == null) return null - var data: String? = null - val pattern = Pattern.compile(regexValue, Pattern.MULTILINE) - val matcher: Matcher = pattern.matcher(value) +fun extractFullMatcherWithRegex(regexPattern: String?, value: String?): String? { + if (regexPattern == null || value == null) return null - while (matcher.find()) { - data = matcher.group(0) - } - return data -} + val pattern = Pattern.compile(regexPattern, Pattern.MULTILINE) + val matcher = pattern.matcher(value) + return if (matcher.find()) matcher.group(0) else null +} -fun removeNonAscii(value: String): String { +fun filterAllowedAndStandardCharacters(value: String): String { val result = StringBuilder() - for (`val` in value.toCharArray()) { - if( allowedList.contains(`val`.toString())){ - result.append(`val`) - } - htmlToFrenchLettre.forEach { (key, value) -> - if (`val`.toString() == value) result.append(value) + for (char in value) { + when { + allowedList.contains(char.toString()) -> result.append(char) + htmlToFrenchLettre.values.contains(char.toString()) -> result.append(char) + char.code < 192 -> result.append(char) } - if (`val`.code < 192) result.append(`val`) } + return result.toString() } diff --git a/shared/src/test/kotlin/StringsTest.kt b/shared/src/test/kotlin/StringsTest.kt index b74087f40f..1ad0ccbef2 100644 --- a/shared/src/test/kotlin/StringsTest.kt +++ b/shared/src/test/kotlin/StringsTest.kt @@ -79,15 +79,15 @@ class StringsTest { } @Test - fun `Test good value with xss`() { + fun `Test different values with xss`() { val goodHTML = "