Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize JSON code to work with either --unicode-char mode #118

Merged
merged 17 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions src/Collections/Sequences/Seq.dfy
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,26 @@ module {:options "-functionSyntax:4"} Seq {
*
***********************************************************/

/* If a predicate is true at every index of a sequence,
it is true for every member of the sequence as a collection.
Useful for converting quantifiers between the two forms
to satisfy a precondition in the latter form. */
lemma IndexingImpliesMembership<T>(p: T -> bool, xs: seq<T>)
requires forall i | 0 <= i < |xs| :: p(xs[i])
ensures forall t | t in xs :: p(t)
{
}

/* If a predicate is true for every member of a sequence as a collection,
it is true at every index of the sequence.
Useful for converting quantifiers between the two forms
to satisfy a precondition in the latter form. */
lemma MembershipImpliesIndexing<T>(p: T -> bool, xs: seq<T>)
requires forall t | t in xs :: p(t)
ensures forall i | 0 <= i < |xs| :: p(xs[i])
{
}

/* Is true if the sequence xs is a prefix of the sequence ys. */
ghost predicate IsPrefix<T>(xs: seq<T>, ys: seq<T>)
ensures IsPrefix(xs, ys) ==> (|xs| <= |ys| && xs == ys[..|xs|])
Expand Down Expand Up @@ -603,8 +623,12 @@ module {:options "-functionSyntax:4"} Seq {
ensures forall i {:trigger result[i]} :: 0 <= i < |xs| ==> result[i] == f(xs[i]);
reads set i, o | 0 <= i < |xs| && o in f.reads(xs[i]) :: o
{
if |xs| == 0 then []
else [f(xs[0])] + Map(f, xs[1..])
// This uses a sequence comprehension because it will usually be
// more efficient when compiled, allocating the storage for |xs| elements
// once instead of creating a chain of |xs| single element concatenations.
seq(|xs|, i requires 0 <= i < |xs| && f.requires(xs[i])
reads set i,o | 0 <= i < |xs| && o in f.reads(xs[i]) :: o
=> f(xs[i]))
}

/* Applies a function to every element of a sequence, returning a Result value (which is a
Expand Down Expand Up @@ -803,6 +827,26 @@ module {:options "-functionSyntax:4"} Seq {
}
}

/* Optimized implementation of Flatten(Map(f, xs)). */
function {:opaque} FlatMap<T,R>(f: (T ~> seq<R>), xs: seq<T>): (result: seq<R>)
requires forall i :: 0 <= i < |xs| ==> f.requires(xs[i])
ensures result == Flatten(Map(f, xs));
reads set i, o | 0 <= i < |xs| && o in f.reads(xs[i]) :: o
{
Flatten(Map(f, xs))
}
by method {
result := [];
ghost var unflattened: seq<seq<R>> := [];
for i := |xs| downto 0
invariant unflattened == Map(f, xs[i..])
invariant result == Flatten(unflattened)
{
var next := f(xs[i]);
unflattened := [next] + unflattened;
result := next + result;
}
}

/**********************************************************
*
Expand Down
82 changes: 51 additions & 31 deletions src/JSON/Deserializer.dfy
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ include "../BoundedInts.dfy"

include "Utils/Views.dfy"
include "Utils/Vectors.dfy"
include "Utils/Unicode.dfy"
include "Errors.dfy"
include "AST.dfy"
include "Grammar.dfy"
Expand All @@ -25,7 +24,7 @@ module {:options "-functionSyntax:4"} JSON.Deserializer {
import opened Logarithm
import opened Power
import opened Utils.Str
import Utils.Unicode
import opened UnicodeStrings

import AST
import Spec
Expand All @@ -39,60 +38,81 @@ module {:options "-functionSyntax:4"} JSON.Deserializer {
js.At(0) == 't' as byte
}

function UnsupportedEscape16(code: seq<uint16>): DeserializationError {
UnsupportedEscape(FromUTF16Checked(code).UnwrapOr("Couldn't decode UTF-16"))
}

module Uint16StrConversion refines Str.ParametricConversion {
import opened BoundedInts

type Char = uint16
}

const HEX_TABLE_16: map<Uint16StrConversion.Char, nat> :=
map[
'0' as uint16 := 0, '1' as uint16 := 1, '2' as uint16 := 2, '3' as uint16 := 3, '4' as uint16 := 4,
'5' as uint16 := 5, '6' as uint16 := 6, '7' as uint16 := 7, '8' as uint16 := 8, '9' as uint16 := 9,
'a' as uint16 := 0xA, 'b' as uint16 := 0xB, 'c' as uint16 := 0xC, 'd' as uint16 := 0xD, 'e' as uint16 := 0xE, 'f' as uint16 := 0xF,
'A' as uint16 := 0xA, 'B' as uint16 := 0xB, 'C' as uint16 := 0xC, 'D' as uint16 := 0xD, 'E' as uint16 := 0xE, 'F' as uint16 := 0xF
]

function ToNat16(str: Uint16StrConversion.String): uint16
requires |str| <= 4
requires forall c | c in str :: c in HEX_TABLE_16
{
Uint16StrConversion.ToNat_bound(str, 16, HEX_TABLE_16);
var hd := Uint16StrConversion.ToNat_any(str, 16, HEX_TABLE_16);
assert hd < 0x1_0000 by { reveal Pow(); }
hd as uint16
}

// TODO: Verify this function
function Unescape(str: string, start: nat := 0): DeserializationResult<string>
function Unescape(str: seq<uint16>, start: nat := 0): DeserializationResult<seq<uint16>>
decreases |str| - start
{ // Assumes UTF-16 strings
if start >= |str| then Success([])
else if str[start] == '\\' then
else if str[start] == '\\' as uint16 then
if |str| == start + 1 then
Failure(EscapeAtEOS)
else
var c := str[start + 1];
if c == 'u' then
if c == 'u' as uint16 then
if |str| <= start + 6 then
Failure(EscapeAtEOS)
else
var code := str[start + 2..start + 6];
if exists c | c in code :: c !in Str.HEX_TABLE then
Failure(UnsupportedEscape(code))
if exists c | c in code :: c !in HEX_TABLE_16 then
Failure(UnsupportedEscape16(code))
else
var tl :- Unescape(str, start + 6);
var hd := Str.ToNat(code, 16);
assert hd < 0x10000 by { reveal Pow(); }
if 0xD7FF < hd then
Failure(UnsupportedEscape(code))
else
Success([hd as char])

var hd := ToNat16(code);
Success([hd])
else
var unescaped: uint16 := match c
case '\"' => 0x22 as uint16 // quotation mark
case '\\' => 0x5C as uint16 // reverse solidus
case 'b' => 0x08 as uint16 // backspace
case 'f' => 0x0C as uint16 // form feed
case 'n' => 0x0A as uint16 // line feed
case 'r' => 0x0D as uint16 // carriage return
case 't' => 0x09 as uint16 // tab
case 0x22 => 0x22 as uint16 // \" => quotation mark
case 0x5C => 0x5C as uint16 // \\ => reverse solidus
case 0x62 => 0x08 as uint16 // \b => backspace
case 0x66 => 0x0C as uint16 // \f => form feed
case 0x6E => 0x0A as uint16 // \n => line feed
case 0x72 => 0x0D as uint16 // \r => carriage return
case 0x74 => 0x09 as uint16 // \t => tab
case _ => 0 as uint16;
if unescaped == 0 as uint16 then
Failure(UnsupportedEscape(str[start..start+2]))
if unescaped as int == 0 then
Failure(UnsupportedEscape16(str[start..start+2]))
else
var tl :- Unescape(str, start + 2);
Success([unescaped as char] + tl)
Success([unescaped] + tl)
else
var tl :- Unescape(str, start + 1);
Success([str[start]] + tl)
}

function Transcode8To16Unescaped(str: seq<byte>): DeserializationResult<string>
// TODO Optimize with a function by method
{
Unescape(Unicode.Transcode8To16(str))
}

function String(js: Grammar.jstring): DeserializationResult<string> {
Transcode8To16Unescaped(js.contents.Bytes())
// TODO Optimize with a function by method
var asUtf32 :- FromUTF8Checked(js.contents.Bytes()).ToResult'(DeserializationError.InvalidUnicode);
var asUint16 :- ToUTF16Checked(asUtf32).ToResult'(DeserializationError.InvalidUnicode);
var unescaped :- Unescape(asUint16);
FromUTF16Checked(unescaped).ToResult'(DeserializationError.InvalidUnicode)
}

module ByteStrConversion refines Str.ParametricConversion {
Expand Down
4 changes: 4 additions & 0 deletions src/JSON/Errors.dfy
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ module {:options "-functionSyntax:4"} JSON.Errors {
| ReachedEOF
| ExpectingByte(expected: byte, b: opt_byte)
| ExpectingAnyByte(expected_sq: seq<byte>, b: opt_byte)
| InvalidUnicode
{
function ToString() : string {
match this
Expand All @@ -36,19 +37,22 @@ module {:options "-functionSyntax:4"} JSON.Errors {
var c := if b > 0 then "'" + [b as char] + "'" else "EOF";
var c0s := seq(|bs0|, idx requires 0 <= idx < |bs0| => bs0[idx] as char);
"Expecting one of '" + c0s + "', read " + c
case InvalidUnicode => "Invalid Unicode sequence"
}
}

datatype SerializationError =
| OutOfMemory
| IntTooLarge(i: int)
| StringTooLong(s: string)
| InvalidUnicode
{
function ToString() : string {
match this
case OutOfMemory => "Out of memory"
case IntTooLarge(i: int) => "Integer too large: " + Str.OfInt(i)
case StringTooLong(s: string) => "String too long: " + s
case InvalidUnicode => "Invalid Unicode sequence"
}
}

Expand Down
29 changes: 1 addition & 28 deletions src/JSON/Serializer.dfy
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ include "../Math.dfy"

include "Utils/Views.dfy"
include "Utils/Vectors.dfy"
include "Utils/Unicode.dfy"
include "Errors.dfy"
include "AST.dfy"
include "Grammar.dfy"
Expand All @@ -24,7 +23,6 @@ module {:options "-functionSyntax:4"} JSON.Serializer {
import opened Wrappers
import opened BoundedInts
import opened Utils.Str
import Utils.Unicode

import AST
import Spec
Expand All @@ -43,37 +41,12 @@ module {:options "-functionSyntax:4"} JSON.Serializer {
View.OfBytes(if b then TRUE else FALSE)
}

function Transcode16To8Escaped(str: string, start: uint32 := 0): bytes {
Unicode.Transcode16To8(Spec.Escape(str))
} // FIXME speed up using a `by method`
// by method {
// var len := |str| as uint32;
// if len == 0 {
// return [];
// }
// var st := new Vectors.Vector(0, len);
// var c0: uint16 := 0;
// var c1: uint16 := str[0] as uint16;
// var idx: uint32 := 0;
// while idx < len {
// var c0 := c1;
// var c1 := str[idx + 1] as uint16;
// if c0 < 0xD800 || c0 > 0xDBFF {
// Utf8Encode(st, Unicode.Utf16Decode1(c0));
// idx := idx +1;
// } else {
// Utf8Encode(st, Unicode.Utf16Decode2(c0, c1));
// idx := idx + 2;
// }
// }
// }

function CheckLength<T>(s: seq<T>, err: SerializationError): Outcome<SerializationError> {
Need(|s| < TWO_TO_THE_32, err)
}

function String(str: string): Result<jstring> {
var bs := Transcode16To8Escaped(str);
var bs :- Spec.EscapeToUTF8(str);
:- CheckLength(bs, StringTooLong(str));
Success(Grammar.JString(Grammar.DOUBLEQUOTE, View.OfBytes(bs), Grammar.DOUBLEQUOTE))
}
Expand Down
Loading