Skip to content

Commit

Permalink
core: add helper for escaping unicode chars (#1686)
Browse files Browse the repository at this point in the history
Add helper to encode designated special characters in a
string using the unicode escape sequence (\uXXXX).
  • Loading branch information
brharrington authored Aug 20, 2024
1 parent 3b1d5cf commit f753099
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,66 @@ object Strings {
}
}

/**
* Escape special characters in the input string to unicode escape sequences (\uXXXX).
*/
def escape(input: String, isSpecial: Int => Boolean): String = {
val length = input.length
val builder = new java.lang.StringBuilder(length)
var i = 0
while (i < length) {
val cp = input.codePointAt(i)
val len = Character.charCount(cp)
if (isSpecial(cp))
escapeCodePoint(cp, builder)
else
builder.appendCodePoint(cp)
i += len
}
builder.toString
}

private def escapeCodePoint(cp: Int, builder: java.lang.StringBuilder): Unit = {
builder.append("\\u")
builder.append(zeroPad(cp, 4))
}

/**
* Unescape unicode characters in the input string. Ignore any invalid or unrecognized
* escape sequences.
*/
def unescape(input: String): String = {
val length = input.length
val builder = new java.lang.StringBuilder(length)
var i = 0
while (i < length) {
val c = input.charAt(i)
if (c == '\\') {
// Ensure there is enough space for an encoded character, there must be at
// least 5 characters left in the string (uXXXX).
if (length - i <= 5) {
builder.append(input.substring(i))
i = length
} else if (input.charAt(i + 1) == 'u') {
try {
val cp = Integer.parseInt(input.substring(i + 2, i + 6), 16)
builder.appendCodePoint(cp)
i += 5
} catch {
case _: NumberFormatException => builder.append(c)
}
} else {
// Some other escape, copy into buffer and move on
builder.append(c)
}
} else {
builder.append(c)
}
i += 1
}
builder.toString
}

private val uriEscapes: Array[String] = {

def hex(c: Char) = "%%%02X".format(c.toInt)
Expand Down Expand Up @@ -181,7 +241,7 @@ object Strings {
* the only escaping necessary for '%', '&amp;', '+', '?', '=', and ' '.
*/
def urlEncode(s: String): String = {
val buf = new java.lang.StringBuilder
val buf = new java.lang.StringBuilder(s.length)
val size = s.length
var pos = 0
while (pos < size) {
Expand Down Expand Up @@ -212,7 +272,7 @@ object Strings {
* Decoded string.
*/
def hexDecode(input: String, escapeChar: Char = '%'): String = {
val buf = new java.lang.StringBuilder
val buf = new java.lang.StringBuilder(input.length)
val size = input.length
var pos = 0
while (pos < size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,47 @@ class StringsSuite extends FunSuite {
}
}

test("escape") {
var i = 0
while (i < Short.MaxValue) {
val str = Character.toString(i)
assertEquals(escape(str, _ => true), s"\\u${zeroPad(i, 4)}")
i += 1
}
}

test("escape, comma and colon") {
val decoded = ":foo-bar,baz"
val encoded = "\\u003afoo-bar\\u002cbaz"
assertEquals(escape(decoded, c => c == ',' || c == ':'), encoded)
assertEquals(unescape(encoded), decoded)
assertEquals(unescape(decoded), decoded)
}

test("unescape") {
var i = 0
while (i < Short.MaxValue) {
val str = Character.toString(i)
assertEquals(unescape(s"\\u${zeroPad(i, 4)}"), str)
i += 1
}
}

test("unescape, too short") {
val input = "foo\\u000"
assertEquals(unescape(input), input)
}

test("unescape, unknown type") {
val input = "foo\\x0000"
assertEquals(unescape(input), input)
}

test("unescape, invalid") {
val input = "foo\\uzyff"
assertEquals(unescape(input), input)
}

test("urlDecode") {
val str = "a b %25 % %%% %21%zb"
val expected = "a b % % %%% !%zb"
Expand Down

0 comments on commit f753099

Please sign in to comment.