Skip to content

Commit

Permalink
[output sanitizer] wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Pierre Therrode authored and Pierre Therrode committed Oct 24, 2024
1 parent 7c21bcf commit 8e5c88b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 44 deletions.
88 changes: 47 additions & 41 deletions shared/src/main/kotlin/Strings.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package ai.tock.shared
import org.apache.commons.lang3.StringEscapeUtils.escapeHtml4
import java.text.Normalizer
import java.util.Locale
import java.util.regex.Matcher
import java.util.regex.Pattern


Expand Down Expand Up @@ -82,36 +81,46 @@ fun concat(s1: String?, s2: String?): String {
}

private val trailingRegexp = "[.,:;?!]+$".toRegex()
private val accentsRegexp = "[\\p{InCombiningDiacriticalMarks}]".toRegex()
private val accentsRegexp = "\\p{InCombiningDiacriticalMarks}+".toRegex()

private var regexToDetectHTMLAllowedBalise = "(?:<CHANGE_IT)(.*?)(?:>)"
private const val HTML_TAG_PLACEHOLDER = "CHANGE_IT"
private var regexToDetectHTMLAllowedBalise = "(?:<$HTML_TAG_PLACEHOLDER)(.*?)(?:>)"

private val regexToDetectNotAllowedValue = property("tock_safehtml_block_tag", "(?i)s*(script|iframe|object|embed|form|input|link|meta|onload|alert|onerror|href)[^>]")
private val allowedList = listProperty("tock_safehtml_allowed_tag", listOf("ul", "li", ""))

val htmlToFrenchLettre = mapOf("à" to "à", "â" to "â", "ä" to "ä", "ç" to "ç", "è" to "è",
"é" to "é", "ê" to "ê", "ë" to "ë", "î" to "î", "ï" to "ï", "ô" to "ô",
"ö" to "ö", "ù" to "ù", "û" to "û", "ü" to "ü", "ñ" to "ñ")
val htmlToFrenchLettre = mapOf(
"à" to "à", "â" to "â", "ä" to "ä", "ç" to "ç",
"è" to "è", "é" to "é", "ê" to "ê", "ë" to "ë",
"î" to "î", "ï" to "ï", "ô" to "ô", "ö" to "ö",
"ù" to "ù", "û" to "û", "ü" to "ü", "ñ" to "ñ"
)


private fun String.removeTrailingPunctuation() = this.replace(trailingRegexp, "").trim()
private fun String.removeTrailingPunctuation() = replace(trailingRegexp, "").trim()

fun String.stripAccents(): String =
Normalizer.normalize(this, Normalizer.Form.NFD).replace(accentsRegexp, "")

fun String.normalize(locale: Locale): String =
this.lowercase(locale).removeTrailingPunctuation().stripAccents()

fun allowDiacriticsInRegexp(s: String) : String = s.replace("e", "[eéèêë]", ignoreCase = true)
.replace("a", "[aàáâãä]", ignoreCase = true)
.replace("i", "[iìíîï]", ignoreCase = true)
.replace("o", "[oòóôõöø]", ignoreCase = true)
.replace("u", "[uùúûü]", ignoreCase = true)
.replace("n", "[nñ]", ignoreCase = true)
.replace(" ", "['-_ ]")
.replace("c", "[cç]", ignoreCase = true)
lowercase(locale).removeTrailingPunctuation().stripAccents()

fun allowDiacriticsInRegexp(s: String): String {
val replacements = mapOf(
'e' to "[eéèêë]",
'a' to "[aàáâãä]",
'i' to "[iìíîï]",
'o' to "[oòóôõöø]",
'u' to "[uùúûü]",
'n' to "[nñ]",
'c' to "[cç]"
)

return s.fold("") { acc, c ->
acc + (replacements[c.lowercaseChar()] ?: c)
}.replace(" ", "['-_ ]")
}

fun safeHTML(value: String): String {
fun safeHTML(value: String): String {

var simpelValue = escapeHtml4(value)

Expand Down Expand Up @@ -141,7 +150,7 @@ fun allowDiacriticsInRegexp(s: String) : String = s.replace("e", "[eéèêë]",
// Remove bad value
simpelValue = extractAndRemoveBadValue(regexToDetectNotAllowedValue, simpelValue)

return removeNonAscii(simpelValue)
return filterAllowedAndStandardCharacters(simpelValue)
}

private fun detectIfHTMLBaliseIsAllowed(text: String, allowed: String): String? {
Expand All @@ -151,41 +160,38 @@ private fun detectIfHTMLBaliseIsAllowed(text: String, allowed: String): String?

private fun extractAndRemoveBadValue(regexValue: String, value: String): String {
val pattern = Pattern.compile(regexValue, Pattern.MULTILINE)
val matcher: Matcher = pattern.matcher(value)
var tmp = value
val matcher = pattern.matcher(value)

var tmp = value
while (matcher.find()) {
for (i in 1..matcher.groupCount()) {
tmp = tmp.replace(matcher.group(i).toString(), "")
matcher.group(i)?.let {
tmp = tmp.replace(it, "")
}
}
}
return tmp
}

fun extractFullMatcherWithRegex(regexValue: String?, value: String?): String? {
if (regexValue == null || value == null) return null
var data: String? = null
val pattern = Pattern.compile(regexValue, Pattern.MULTILINE)
val matcher: Matcher = pattern.matcher(value)
fun extractFullMatcherWithRegex(regexPattern: String?, value: String?): String? {
if (regexPattern == null || value == null) return null

while (matcher.find()) {
data = matcher.group(0)
}
return data
}
val pattern = Pattern.compile(regexPattern, Pattern.MULTILINE)
val matcher = pattern.matcher(value)

return if (matcher.find()) matcher.group(0) else null
}

fun removeNonAscii(value: String): String {
fun filterAllowedAndStandardCharacters(value: String): String {
val result = StringBuilder()
for (`val` in value.toCharArray()) {

if( allowedList.contains(`val`.toString())){
result.append(`val`)
}
htmlToFrenchLettre.forEach { (key, value) ->
if (`val`.toString() == value) result.append(value)
for (char in value) {
when {
allowedList.contains(char.toString()) -> result.append(char)
htmlToFrenchLettre.values.contains(char.toString()) -> result.append(char)
char.code < 192 -> result.append(char)
}
if (`val`.code < 192) result.append(`val`)
}

return result.toString()
}
6 changes: 3 additions & 3 deletions shared/src/test/kotlin/StringsTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ class StringsTest {
}

@Test
fun `Test good value with xss`() {
fun `Test different values with xss`() {
val goodHTML = "<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li></ul>"
assertEquals(safeHTML(goodHTML),"<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li></ul>")

val badHTML = "<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li></ul><sCriPt>alert('xss')</ScriPt>"
assertEquals(safeHTML(badHTML),"<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li></ul>&lt;&gt;('xss')&lt;/&gt;")

val toto = "<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li><IMG SRC=\"jav&#x09;ascript:aLerT('XSS');\"></ul>"
assertEquals(safeHTML(toto),"<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li>&lt;IMG SRC=\"jav&amp;#x09;a:('XSS');\"&gt;</ul>")
val badImgSRC = "<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li><IMG SRC=\"jav&#x09;ascript:aLerT('XSS');\"></ul>"
assertEquals(safeHTML(badImgSRC),"<ul style=\"list-style: none;\"><li>⭐ Aux voyageurs Business Première</li>&lt;IMG SRC=\"jav&amp;#x09;a:('XSS');\"&gt;</ul>")

val tryWithMaliciousXSS = "<ul style=\"list-style: none;\"><svg onload=\"alert(1)\"></svg><li>⭐ Aux voyageurs Business Première</li></ul>"
assertEquals(safeHTML(tryWithMaliciousXSS),"<ul style=\"list-style: none;\">&lt;svg =\"(1)\"&gt;&lt;/svg&gt;<li>⭐ Aux voyageurs Business Première</li></ul>")
Expand Down

0 comments on commit 8e5c88b

Please sign in to comment.