Skip to content

Commit

Permalink
Some UTF-8 validation. Also: Use isInString() instead of instr(). See…
Browse files Browse the repository at this point in the history
… stringfunctions.bi
  • Loading branch information
StringEpsilon committed Nov 14, 2017
1 parent 8b90086 commit e63d5e4
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 9 deletions.
15 changes: 11 additions & 4 deletions fbJson/JsonItem.bi
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,14 @@ sub JsonItem.Parse(jsonString as byte ptr, endIndex as integer)
end if

' Skipping the opening and closing brackets makes things a bit easier.
for i = parseStart to parseEnd
for i = parseStart to parseEnd

if ( validateCodepoint(jsonstring[i]) = false ) then
currentItem->_datatype = malformed
currentItem->_error = "Invalid codepoint."
return
end if

' Because strings can contain json tokens, we handle them seperately:
if ( jsonString[i] = jsonToken.Quote AndAlso (I = 0 orElse jsonString[i-1] <> jsonToken.BackSlash) ) then
isStringOpen = not(isStringOpen)
Expand All @@ -416,7 +423,7 @@ sub JsonItem.Parse(jsonString as byte ptr, endIndex as integer)
case keyToken
if child = 0 then child = new JsonItem()
fastmid (child->_key, jsonString, valuestart, i - valueStart)
if ( instr(child->_key, "\") <> 0 ) then
if ( isInString(child->_key, jsonToken.backslash) <> 0 ) then
if ( DeEscapeString(child->_key) = false ) then
child->setErrorMessage(invalidEscapeSequence, jsonstring, i)
end if
Expand Down Expand Up @@ -519,7 +526,7 @@ sub JsonItem.Parse(jsonString as byte ptr, endIndex as integer)
else
' If we are in a string IN a value, we add up the length.
if ( state = valueToken ) then
valueLength +=1
valueLength +=1
end if
end if

Expand Down Expand Up @@ -560,7 +567,7 @@ sub JsonItem.Parse(jsonString as byte ptr, endIndex as integer)
if ( jsonstring[valueStart+valueLength-1] ) then
FastMid(child->_value, jsonString, valuestart+1, valueLength-2)
child->_dataType = jsonDataType.jsonString
if ( instr(child->_value, "\") <> 0 ) then
if ( isinstring(child->_value, jsonToken.backslash) <> 0 ) then
if ( DeEscapeString(child->_value) = false ) then
FastMid(child->_value, jsonString, valuestart+1, valueLength-2)
child->setErrorMessage(invalidEscapeSequence, jsonstring, i)
Expand Down
50 changes: 45 additions & 5 deletions fbJson/StringFunctions.bi
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,28 @@ namespace fbJsonInternal

const replacementChar as string = "�"

function validateCodepoint(byref codepoint as byte) as boolean
' Anything below 191 *should* be valid.
if (codepoint < 191) then
return true
end if

select case as const codepoint
' These codepoints are straight up invalid no matter what:
case 192, 193, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255:
return false
case 237
' TODO Validate against surrogate pairs, which are invalid in UTF-8.
return true
case 224, 240, 244
' TODO: Validate against overlong encoding
return true
case else
' TODO: Validate against "unexpected continuation"
end select
return true
end function

' Allows us to interact directly with the FB-Internal string-structure.
' Don't use it, unless you know what you're doing.
type fbString
Expand All @@ -19,6 +41,16 @@ type fbString
dim as integer size
end type

sub FastSpace(byref destination as string, length as uinteger)
dim as fbString ptr destinationPtr = cast(fbString ptr, @destination)
if ( destinationPtr->size <> length ) then
deallocate destinationptr->stringdata
destinationPtr->stringData = allocate( length)
end if
memset(destinationPtr->stringData, 32, length)
destinationPtr->length = length
end sub

sub FastLeft(byref destination as string, length as uinteger)
dim as fbString ptr destinationPtr = cast(fbString ptr, @destination)
destinationPtr->length = IIF(length < destinationPtr->length, length, destinationPtr->length)
Expand All @@ -36,11 +68,19 @@ sub FastMid(byref destination as string, byref source as byte ptr, start as uint
memcpy( destinationPtr->stringData, source+start, destinationPtr->size )
end sub

function isInString(byref target as string, byref query as byte) as boolean
dim as fbstring ptr targetPtr = cast(fbstring ptr, @target)
if ( targetPtr->size = 0 ) then return false

return memchr( targetPtr->stringData, query, targetPtr->size ) <> 0
end function


function EscapedToUtf8(byref codepoint as long) as string
dim result as string

if codePoint <= &h7F then
result = space(1)
fastSpace(result, 1)
result[0] = codePoint
return result
endif
Expand All @@ -51,20 +91,20 @@ function EscapedToUtf8(byref codepoint as long) as string
end if

if (codepoint <= &h7FF) then
result = space(2)
fastSpace(result, 2)
result[0] = &hC0 OR (codepoint SHR 6) AND &h1F
result[1] = &h80 OR codepoint AND &h3F
return result
end if
if (codepoint <= &hFFFF) then
result = space(3)
fastSpace(result, 3)
result[0] = &hE0 OR codepoint SHR 12 AND &hF
result[1] = &h80 OR codepoint SHR 6 AND &h3F
result[2] = &h80 OR codepoint AND &h3F
return result
end if

result = space(4)
fastSpace(result, 4)
result[0] = &hF0 OR codepoint SHR 18 AND &h7
result[1] = &h80 OR codepoint SHR 12 AND &h3F
result[2] = &h80 OR codepoint SHR 6 AND &h3F
Expand Down Expand Up @@ -137,7 +177,7 @@ function DeEscapeString(byref escapedString as string) as boolean

end if

if (glyph = "") then
if (glyph = "" ) then
return false
end if

Expand Down

0 comments on commit e63d5e4

Please sign in to comment.