From 3ff49f887f825a4e13d39d923363df13b638a6a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Pit-Claudel?= Date: Fri, 5 May 2023 23:42:46 +0200 Subject: [PATCH] JSON support (#51) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mikaël Mayer Co-authored-by: Alex Chew Co-authored-by: Robin Salkeld Co-authored-by: Fabio Madge --- .github/workflows/check-examples-in-docs.yml | 2 +- .github/workflows/check-format.yml | 4 +- .github/workflows/nightly.yml | 7 +- .github/workflows/reusable-tests.yml | 11 +- .github/workflows/tests.yml | 6 +- lit.site.cfg | 2 + src/BoundedInts.dfy | 27 +- src/Collections/Sequences/Seq.dfy | 47 +- src/JSON/API.dfy | 40 + src/JSON/ConcreteSyntax.Spec.dfy | 121 +++ src/JSON/ConcreteSyntax.SpecProperties.dfy | 53 ++ src/JSON/Deserializer.dfy | 175 ++++ src/JSON/Errors.dfy | 61 ++ src/JSON/Grammar.dfy | 102 +++ src/JSON/README.md | 122 +++ src/JSON/Serializer.dfy | 153 ++++ src/JSON/Spec.dfy | 148 ++++ src/JSON/Tests.dfy | 136 +++ src/JSON/Tutorial.dfy | 286 +++++++ src/JSON/Tutorial.dfy.expect | 4 + src/JSON/Utils/Cursors.dfy | 336 ++++++++ src/JSON/Utils/Lexers.dfy | 53 ++ src/JSON/Utils/Parsers.dfy | 63 ++ src/JSON/Utils/Seq.dfy | 23 + src/JSON/Utils/Str.dfy | 258 ++++++ src/JSON/Utils/Vectors.dfy | 217 +++++ src/JSON/Utils/Views.Writers.dfy | 121 +++ src/JSON/Utils/Views.dfy | 123 +++ src/JSON/Values.dfy | 18 + src/JSON/ZeroCopy/API.dfy | 47 ++ src/JSON/ZeroCopy/Deserializer.dfy | 786 ++++++++++++++++++ src/JSON/ZeroCopy/Serializer.dfy | 343 ++++++++ src/Math.dfy | 5 + src/NonlinearArithmetic/DivMod.dfy | 16 + src/NonlinearArithmetic/Logarithm.dfy | 103 +++ src/Unicode/UnicodeEncodingForm.dfy | 17 + src/Unicode/UnicodeStrings.dfy | 62 ++ src/Unicode/UnicodeStringsWithUnicodeChar.dfy | 86 ++ .../UnicodeStringsWithoutUnicodeChar.dfy | 52 ++ src/Wrappers.dfy | 6 + 40 files changed, 4213 insertions(+), 29 deletions(-) create mode 100644 src/JSON/API.dfy create mode 100644 src/JSON/ConcreteSyntax.Spec.dfy create mode 100644 src/JSON/ConcreteSyntax.SpecProperties.dfy create mode 100644 src/JSON/Deserializer.dfy create mode 100644 src/JSON/Errors.dfy create mode 100644 src/JSON/Grammar.dfy create mode 100644 src/JSON/README.md create mode 100644 src/JSON/Serializer.dfy create mode 100644 src/JSON/Spec.dfy create mode 100644 src/JSON/Tests.dfy create mode 100644 src/JSON/Tutorial.dfy create mode 100644 src/JSON/Tutorial.dfy.expect create mode 100644 src/JSON/Utils/Cursors.dfy create mode 100644 src/JSON/Utils/Lexers.dfy create mode 100644 src/JSON/Utils/Parsers.dfy create mode 100644 src/JSON/Utils/Seq.dfy create mode 100644 src/JSON/Utils/Str.dfy create mode 100644 src/JSON/Utils/Vectors.dfy create mode 100644 src/JSON/Utils/Views.Writers.dfy create mode 100644 src/JSON/Utils/Views.dfy create mode 100644 src/JSON/Values.dfy create mode 100644 src/JSON/ZeroCopy/API.dfy create mode 100644 src/JSON/ZeroCopy/Deserializer.dfy create mode 100644 src/JSON/ZeroCopy/Serializer.dfy create mode 100644 src/NonlinearArithmetic/Logarithm.dfy create mode 100644 src/Unicode/UnicodeStrings.dfy create mode 100644 src/Unicode/UnicodeStringsWithUnicodeChar.dfy create mode 100644 src/Unicode/UnicodeStringsWithoutUnicodeChar.dfy diff --git a/.github/workflows/check-examples-in-docs.yml b/.github/workflows/check-examples-in-docs.yml index 7635cd64..a30d8b46 100644 --- a/.github/workflows/check-examples-in-docs.yml +++ b/.github/workflows/check-examples-in-docs.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - version: [ 3.12.0, 3.13.1, 4.0.0 ] + version: [ 3.13.1, 4.0.0 ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/check-format.yml b/.github/workflows/check-format.yml index 8ac260af..56fdcbe5 100644 --- a/.github/workflows/check-format.yml +++ b/.github/workflows/check-format.yml @@ -13,9 +13,9 @@ jobs: - uses: actions/checkout@v3 - name: Install Dafny - uses: dafny-lang/setup-dafny-action@v1 + uses: dafny-lang/setup-dafny-action@v1.6.1 with: - dafny-version: "nightly-2023-02-15-567a5ba" + dafny-version: "4.0.0" - name: Install lit run: pip install lit OutputCheck diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a4cca8d2..ff144cfa 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -14,12 +14,7 @@ jobs: verification: strategy: matrix: - # nightly-latest to catch anything that breaks these tests in current development - # 2/18/2023 version is the first that supports logging, but it is not supported by setup-dafny-action 1.6.1 - # 3.11.0 supports new CLI but does not support logging - # setup-dafny-action does not yet support 3.13.1 or recent nightly-lates - - version: [ nightly-latest, nightly-2023-02-18-ef4f346, 3.11.0, 3.12.0, 3.13.1, 4.0.0 ] + version: [ nightly-latest ] uses: ./.github/workflows/reusable-tests.yml with: diff --git a/.github/workflows/reusable-tests.yml b/.github/workflows/reusable-tests.yml index 812b0caa..7c78aef0 100644 --- a/.github/workflows/reusable-tests.yml +++ b/.github/workflows/reusable-tests.yml @@ -39,23 +39,14 @@ jobs: - name: Set up JS dependencies run: npm install bignumber.js - - name: Verify Code and Examples without logging - id: nolog - if: inputs.dafny-version == '3.11.0' - run: lit --time-tests -v . - - name: Verify Code and Examples - id: withlog - if: steps.nolog.conclusion == 'skipped' run: | lit --time-tests -v --param 'dafny_params=--log-format trx --log-format csv' . - name: Generate Report - if: always() && steps.withlog.conclusion != 'skipped' - run: find . -name '*.csv' -print0 | xargs -0 --verbose dafny-reportgenerator summarize-csv-results --max-duration-seconds 10 + run: find . -name '*.csv' -print0 | xargs -0 --verbose dafny-reportgenerator summarize-csv-results --max-resource-count 40000000 - uses: actions/upload-artifact@v2 # upload test results - if: always() && steps.withlog.conclusion != 'skipped' with: name: verification-results path: '**/TestResults/*.trx' diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1379bd25..b42bc958 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,11 +13,7 @@ jobs: verification: strategy: matrix: - # nightly-latest to catch anything that breaks these tests in current development - # 2/18/2023 version is the first that supports logging, but it is not supported by dafny-setup-action 1.6.1 - # 3.11.0 supports new CLI but does not support logging - # setup-dafny-action does not yet support 3.13.1 or recent nightly-latest - version: [ 3.11.0, 3.12.0, 3.13.1, 4.0.0 ] + version: [ 3.13.1, 4.0.0 ] uses: ./.github/workflows/reusable-tests.yml with: diff --git a/lit.site.cfg b/lit.site.cfg index 60666266..67a1314e 100644 --- a/lit.site.cfg +++ b/lit.site.cfg @@ -140,6 +140,7 @@ resolveArgs = ' resolve --use-basename-for-filename ' + standardArguments verifyArgs = ' verify --use-basename-for-filename --cores:2 --verification-time-limit:300 ' + standardArguments buildArgs = ' build --use-basename-for-filename --cores:2 --verification-time-limit:300 ' + standardArguments runArgs = ' run --use-basename-for-filename --cores:2 --verification-time-limit:300 ' + standardArguments +testArgs = ' test --use-basename-for-filename --cores:2 --verification-time-limit:300 ' + standardArguments config.substitutions.append( ('%trargs', '--use-basename-for-filename --cores:2 --verification-time-limit:300' ) ) @@ -148,6 +149,7 @@ config.substitutions.append( ('%verify', dafnyExecutable + verifyArgs ) ) config.substitutions.append( ('%translate', dafnyExecutable + ' translate' ) ) config.substitutions.append( ('%build', dafnyExecutable + buildArgs ) ) config.substitutions.append( ('%run', dafnyExecutable + runArgs ) ) +config.substitutions.append( ('%test', dafnyExecutable + runArgs ) ) # config.substitutions.append( ('%repositoryRoot', repositoryRoot) ) # config.substitutions.append( ('%binaryDir', binaryDir) ) diff --git a/src/BoundedInts.dfy b/src/BoundedInts.dfy index df143075..33768d5d 100644 --- a/src/BoundedInts.dfy +++ b/src/BoundedInts.dfy @@ -2,7 +2,6 @@ module {:options "-functionSyntax:4"} BoundedInts { const TWO_TO_THE_0: int := 1 - const TWO_TO_THE_1: int := 2 const TWO_TO_THE_2: int := 4 const TWO_TO_THE_4: int := 16 @@ -20,12 +19,12 @@ module {:options "-functionSyntax:4"} BoundedInts { const TWO_TO_THE_512: int := 0x1_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; - newtype uint8 = x: int | 0 <= x < TWO_TO_THE_8 + newtype uint8 = x: int | 0 <= x < TWO_TO_THE_8 newtype uint16 = x: int | 0 <= x < TWO_TO_THE_16 newtype uint32 = x: int | 0 <= x < TWO_TO_THE_32 newtype uint64 = x: int | 0 <= x < TWO_TO_THE_64 - newtype int8 = x: int | -0x80 <= x < 0x80 + newtype int8 = x: int | -0x80 <= x < 0x80 newtype int16 = x: int | -0x8000 <= x < 0x8000 newtype int32 = x: int | -0x8000_0000 <= x < 0x8000_0000 newtype int64 = x: int | -0x8000_0000_0000_0000 <= x < 0x8000_0000_0000_0000 @@ -35,4 +34,26 @@ module {:options "-functionSyntax:4"} BoundedInts { newtype nat32 = x: int | 0 <= x < 0x8000_0000 newtype nat64 = x: int | 0 <= x < 0x8000_0000_0000_0000 + const UINT8_MAX: uint8 := 0xFF + const UINT16_MAX: uint16 := 0xFFFF + const UINT32_MAX: uint32 := 0xFFFF_FFFF + const UINT64_MAX: uint64 := 0xFFFF_FFFF_FFFF_FFFF + + const INT8_MIN: int8 := -0x80 + const INT8_MAX: int8 := 0x7F + const INT16_MIN: int16 := -0x8000 + const INT16_MAX: int16 := 0x7FFF + const INT32_MIN: int32 := -0x8000_0000 + const INT32_MAX: int32 := 0x7FFFFFFF + const INT64_MIN: int64 := -0x8000_0000_0000_0000 + const INT64_MAX: int64 := 0x7FFFFFFF_FFFFFFFF + + const NAT8_MAX: nat8 := 0x7F + const NAT16_MAX: nat16 := 0x7FFF + const NAT32_MAX: nat32 := 0x7FFFFFFF + const NAT64_MAX: nat64 := 0x7FFFFFFF_FFFFFFFF + + type byte = uint8 + type bytes = seq + newtype opt_byte = c: int | -1 <= c < TWO_TO_THE_8 } diff --git a/src/Collections/Sequences/Seq.dfy b/src/Collections/Sequences/Seq.dfy index 815970b9..a71a487c 100644 --- a/src/Collections/Sequences/Seq.dfy +++ b/src/Collections/Sequences/Seq.dfy @@ -90,6 +90,26 @@ module {:options "-functionSyntax:4"} Seq { * ***********************************************************/ + /* If a predicate is true at every index of a sequence, + it is true for every member of the sequence as a collection. + Useful for converting quantifiers between the two forms + to satisfy a precondition in the latter form. */ + lemma IndexingImpliesMembership(p: T -> bool, xs: seq) + requires forall i | 0 <= i < |xs| :: p(xs[i]) + ensures forall t | t in xs :: p(t) + { + } + + /* If a predicate is true for every member of a sequence as a collection, + it is true at every index of the sequence. + Useful for converting quantifiers between the two forms + to satisfy a precondition in the latter form. */ + lemma MembershipImpliesIndexing(p: T -> bool, xs: seq) + requires forall t | t in xs :: p(t) + ensures forall i | 0 <= i < |xs| :: p(xs[i]) + { + } + /* Is true if the sequence xs is a prefix of the sequence ys. */ ghost predicate IsPrefix(xs: seq, ys: seq) ensures IsPrefix(xs, ys) ==> (|xs| <= |ys| && xs == ys[..|xs|]) @@ -603,8 +623,12 @@ module {:options "-functionSyntax:4"} Seq { ensures forall i {:trigger result[i]} :: 0 <= i < |xs| ==> result[i] == f(xs[i]); reads set i, o | 0 <= i < |xs| && o in f.reads(xs[i]) :: o { - if |xs| == 0 then [] - else [f(xs[0])] + Map(f, xs[1..]) + // This uses a sequence comprehension because it will usually be + // more efficient when compiled, allocating the storage for |xs| elements + // once instead of creating a chain of |xs| single element concatenations. + seq(|xs|, i requires 0 <= i < |xs| && f.requires(xs[i]) + reads set i,o | 0 <= i < |xs| && o in f.reads(xs[i]) :: o + => f(xs[i])) } /* Applies a function to every element of a sequence, returning a Result value (which is a @@ -803,6 +827,25 @@ module {:options "-functionSyntax:4"} Seq { } } + /* Optimized implementation of Flatten(Map(f, xs)). */ + function FlatMap(f: (T ~> seq), xs: seq): (result: seq) + requires forall i :: 0 <= i < |xs| ==> f.requires(xs[i]) + reads set i, o | 0 <= i < |xs| && o in f.reads(xs[i]) :: o + { + Flatten(Map(f, xs)) + } + by method { + result := []; + ghost var unflattened: seq> := []; + for i := |xs| downto 0 + invariant unflattened == Map(f, xs[i..]) + invariant result == Flatten(unflattened) + { + var next := f(xs[i]); + unflattened := [next] + unflattened; + result := next + result; + } + } /********************************************************** * diff --git a/src/JSON/API.dfy b/src/JSON/API.dfy new file mode 100644 index 00000000..74f9c89e --- /dev/null +++ b/src/JSON/API.dfy @@ -0,0 +1,40 @@ +// RUN: %verify "%s" --unicode-char:false ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %verify "%s" --unicode-char:true ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +include "Serializer.dfy" +include "Deserializer.dfy" +include "ZeroCopy/API.dfy" + +module {:options "-functionSyntax:4"} JSON.API { + import opened BoundedInts + import opened Errors + import Values + import Serializer + import Deserializer + import ZeroCopy = ZeroCopy.API + + function {:opaque} Serialize(js: Values.JSON) : (bs: SerializationResult>) + { + var js :- Serializer.JSON(js); + ZeroCopy.Serialize(js) + } + + method SerializeAlloc(js: Values.JSON) returns (bs: SerializationResult>) + { + var js :- Serializer.JSON(js); + bs := ZeroCopy.SerializeAlloc(js); + } + + method SerializeInto(js: Values.JSON, bs: array) returns (len: SerializationResult) + modifies bs + { + var js :- Serializer.JSON(js); + len := ZeroCopy.SerializeInto(js, bs); + } + + function {:opaque} Deserialize(bs: seq) : (js: DeserializationResult) + { + var js :- ZeroCopy.Deserialize(bs); + Deserializer.JSON(js) + } +} diff --git a/src/JSON/ConcreteSyntax.Spec.dfy b/src/JSON/ConcreteSyntax.Spec.dfy new file mode 100644 index 00000000..20510aab --- /dev/null +++ b/src/JSON/ConcreteSyntax.Spec.dfy @@ -0,0 +1,121 @@ +// RUN: %verify "%s" + +include "Grammar.dfy" + +module {:options "-functionSyntax:4"} JSON.ConcreteSyntax.Spec { + import opened BoundedInts + + import Vs = Utils.Views.Core + import opened Grammar + + function View(v: Vs.View) : bytes { + v.Bytes() + } + + function Structural(self: Structural, fT: T -> bytes): bytes { + View(self.before) + fT(self.t) + View(self.after) + } + + function StructuralView(self: Structural): bytes { + Structural(self, View) + } + + function Maybe(self: Maybe, fT: T -> bytes): (bs: bytes) + ensures self.Empty? ==> bs == [] + ensures self.NonEmpty? ==> bs == fT(self.t) + { + if self.Empty? then [] else fT(self.t) + } + + function ConcatBytes(ts: seq, fT: T --> bytes) : bytes + requires forall d | d in ts :: fT.requires(d) + { + if |ts| == 0 then [] + else fT(ts[0]) + ConcatBytes(ts[1..], fT) + } + + function Bracketed(self: Bracketed, fDatum: Suffixed --> bytes): bytes + requires forall d | d < self :: fDatum.requires(d) + { + StructuralView(self.l) + + ConcatBytes(self.data, fDatum) + + StructuralView(self.r) + } + + function KeyValue(self: jKeyValue): bytes { + String(self.k) + StructuralView(self.colon) + Value(self.v) + } + + function Frac(self: jfrac): bytes { + View(self.period) + View(self.num) + } + + function Exp(self: jexp): bytes { + View(self.e) + View(self.sign) + View(self.num) + } + + function Number(self: jnumber): bytes { + View(self.minus) + View(self.num) + Maybe(self.frac, Frac) + Maybe(self.exp, Exp) + } + + function String(self: jstring): bytes { + View(self.lq) + View(self.contents) + View(self.rq) + } + + function CommaSuffix(c: Maybe>): bytes { + // BUG(https://github.com/dafny-lang/dafny/issues/2179) + Maybe>(c, StructuralView) + } + + function Member(self: jmember) : bytes { + KeyValue(self.t) + CommaSuffix(self.suffix) + } + + function Item(self: jitem) : bytes { + Value(self.t) + CommaSuffix(self.suffix) + } + + function Object(obj: jobject) : bytes { + Bracketed(obj, (d: jmember) requires d < obj => Member(d)) + } + + function Array(arr: jarray) : bytes { + Bracketed(arr, (d: jitem) requires d < arr => Item(d)) + } + + function Value(self: Value) : bytes { + match self { + case Null(n) => View(n) + case Bool(b) => View(b) + case String(str) => String(str) + case Number(num) => Number(num) + case Object(obj) => Object(obj) + case Array(arr) => Array(arr) + } + } + + lemma UnfoldValueNumber(v: Value) + requires v.Number? + ensures Value(v) == Number(v.num) + { + assert Value(v) == match v { case Number(num) => Number(num) case _ => []}; + } + + lemma UnfoldValueObject(v: Value) + requires v.Object? + ensures Value(v) == Object(v.obj) + { + assert Value(v) == match v { case Object(obj) => Object(obj) case _ => []}; + } + + lemma UnfoldValueArray(v: Value) + requires v.Array? + ensures Value(v) == Array(v.arr) + { + assert Value(v) == match v { case Array(arr) => Array(arr) case _ => []}; + } + + function JSON(js: JSON) : bytes { + Structural(js, Value) + } +} diff --git a/src/JSON/ConcreteSyntax.SpecProperties.dfy b/src/JSON/ConcreteSyntax.SpecProperties.dfy new file mode 100644 index 00000000..4e5889f8 --- /dev/null +++ b/src/JSON/ConcreteSyntax.SpecProperties.dfy @@ -0,0 +1,53 @@ +// RUN: %verify "%s" --unicode-char:false ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %verify "%s" --unicode-char:true ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +include "ConcreteSyntax.Spec.dfy" + +module {:options "-functionSyntax:4"} JSON.ConcreteSyntax.SpecProperties +// Some useful properties about the functions used in `ConcreteSyntax.Spec`. +{ + import opened BoundedInts + + import Vs = Utils.Views.Core + import opened Grammar + import Spec + + lemma Bracketed_Morphism(bracketed: Bracketed) + ensures forall pd0: Suffixed --> bytes, pd1: Suffixed --> bytes + | && (forall d | d < bracketed :: pd0.requires(d)) + && (forall d | d < bracketed :: pd1.requires(d)) + && (forall d | d < bracketed :: pd0(d) == pd1(d)) + :: Spec.Bracketed(bracketed, pd0) == Spec.Bracketed(bracketed, pd1) + { + forall pd0: Suffixed --> bytes, pd1: Suffixed --> bytes + | && (forall d | d < bracketed :: pd0.requires(d)) + && (forall d | d < bracketed :: pd1.requires(d)) + && (forall d | d < bracketed :: pd0(d) == pd1(d)) + { + calc { + Spec.Bracketed(bracketed, pd0); + { ConcatBytes_Morphism(bracketed.data, pd0, pd1); } + Spec.Bracketed(bracketed, pd1); + } + } + } + + lemma {:induction ts} ConcatBytes_Morphism(ts: seq, pt0: T --> bytes, pt1: T --> bytes) + requires forall d | d in ts :: pt0.requires(d) + requires forall d | d in ts :: pt1.requires(d) + requires forall d | d in ts :: pt0(d) == pt1(d) + ensures Spec.ConcatBytes(ts, pt0) == Spec.ConcatBytes(ts, pt1) + {} + + lemma {:induction ts0} ConcatBytes_Linear(ts0: seq, ts1: seq, pt: T --> bytes) + requires forall d | d in ts0 :: pt.requires(d) + requires forall d | d in ts1 :: pt.requires(d) + ensures Spec.ConcatBytes(ts0 + ts1, pt) == Spec.ConcatBytes(ts0, pt) + Spec.ConcatBytes(ts1, pt) + { + if |ts0| == 0 { + assert [] + ts1 == ts1; + } else { + assert ts0 + ts1 == [ts0[0]] + (ts0[1..] + ts1); + } + } +} diff --git a/src/JSON/Deserializer.dfy b/src/JSON/Deserializer.dfy new file mode 100644 index 00000000..bc158985 --- /dev/null +++ b/src/JSON/Deserializer.dfy @@ -0,0 +1,175 @@ +// RUN: %verify "%s" --unicode-char:false ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %verify "%s" --unicode-char:true ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +/// ================================================== +/// Deserialization from JSON.Grammar to JSON.Values +/// ================================================== +/// +/// For the spec, see ``JSON.Spec.dfy``. + +include "../Collections/Sequences/Seq.dfy" +include "../BoundedInts.dfy" + +include "Utils/Views.dfy" +include "Utils/Vectors.dfy" +include "Errors.dfy" +include "Values.dfy" +include "Grammar.dfy" +include "Spec.dfy" + +module {:options "-functionSyntax:4"} JSON.Deserializer { + import Seq + + import opened Wrappers + import opened BoundedInts + import opened Logarithm + import opened Power + import opened Utils.Str + import opened UnicodeStrings + + import Values + import Spec + import opened Errors + import opened Utils.Vectors + import opened Grammar + import opened Utils.Views.Core + + function Bool(js: Grammar.jbool): bool { + assert js.Bytes() in {Grammar.TRUE, Grammar.FALSE}; + js.At(0) == 't' as byte + } + + function UnsupportedEscape16(code: seq): DeserializationError { + UnsupportedEscape(FromUTF16Checked(code).UnwrapOr("Couldn't decode UTF-16")) + } + + module Uint16StrConversion refines Str.ParametricConversion { + import opened BoundedInts + + type Char = uint16 + } + + const HEX_TABLE_16: map := + map[ + '0' as uint16 := 0, '1' as uint16 := 1, '2' as uint16 := 2, '3' as uint16 := 3, '4' as uint16 := 4, + '5' as uint16 := 5, '6' as uint16 := 6, '7' as uint16 := 7, '8' as uint16 := 8, '9' as uint16 := 9, + 'a' as uint16 := 0xA, 'b' as uint16 := 0xB, 'c' as uint16 := 0xC, 'd' as uint16 := 0xD, 'e' as uint16 := 0xE, 'f' as uint16 := 0xF, + 'A' as uint16 := 0xA, 'B' as uint16 := 0xB, 'C' as uint16 := 0xC, 'D' as uint16 := 0xD, 'E' as uint16 := 0xE, 'F' as uint16 := 0xF + ] + + function ToNat16(str: Uint16StrConversion.String): uint16 + requires |str| <= 4 + requires forall c | c in str :: c in HEX_TABLE_16 + { + Uint16StrConversion.ToNat_bound(str, 16, HEX_TABLE_16); + var hd := Uint16StrConversion.ToNat_any(str, 16, HEX_TABLE_16); + assert hd < 0x1_0000 by { reveal Pow(); } + hd as uint16 + } + + function {:tailrecursion} {:vcs_split_on_every_assert} Unescape(str: seq, start: nat := 0, prefix: seq := []): DeserializationResult> + decreases |str| - start + { // Assumes UTF-16 strings + if start >= |str| then Success(prefix) + else if str[start] == '\\' as uint16 then + if |str| == start + 1 then + Failure(EscapeAtEOS) + else + var c := str[start + 1]; + if c == 'u' as uint16 then + if |str| <= start + 6 then + Failure(EscapeAtEOS) + else + var code := str[start + 2..start + 6]; + if exists c | c in code :: c !in HEX_TABLE_16 then + Failure(UnsupportedEscape16(code)) + else + var hd := ToNat16(code); + Unescape(str, start + 6, prefix + [hd]) + else + var unescaped: uint16 := match c + case 0x22 => 0x22 as uint16 // \" => quotation mark + case 0x5C => 0x5C as uint16 // \\ => reverse solidus + case 0x62 => 0x08 as uint16 // \b => backspace + case 0x66 => 0x0C as uint16 // \f => form feed + case 0x6E => 0x0A as uint16 // \n => line feed + case 0x72 => 0x0D as uint16 // \r => carriage return + case 0x74 => 0x09 as uint16 // \t => tab + case _ => 0 as uint16; + if unescaped as int == 0 then + Failure(UnsupportedEscape16(str[start..start+2])) + else + Unescape(str, start + 2, prefix + [unescaped]) + else + Unescape(str, start + 1, prefix + [str[start]]) + } + + function String(js: Grammar.jstring): DeserializationResult { + var asUtf32 :- FromUTF8Checked(js.contents.Bytes()).ToResult'(DeserializationError.InvalidUnicode); + var asUint16 :- ToUTF16Checked(asUtf32).ToResult'(DeserializationError.InvalidUnicode); + var unescaped :- Unescape(asUint16); + FromUTF16Checked(unescaped).ToResult'(DeserializationError.InvalidUnicode) + } + + module ByteStrConversion refines Str.ParametricConversion { + import opened BoundedInts + type Char = byte + } + + const DIGITS := + map[ + '0' as uint8 := 0, '1' as uint8 := 1, '2' as uint8 := 2, '3' as uint8 := 3, + '4' as uint8 := 4, '5' as uint8 := 5, '6' as uint8 := 6, '7' as uint8 := 7, + '8' as uint8 := 8, '9' as uint8 := 9 + ] + + const MINUS := '-' as uint8 + + function ToInt(sign: jsign, n: jnum): DeserializationResult { + var n: int := ByteStrConversion.ToNat_any(n.Bytes(), 10, DIGITS); + Success(if sign.Char?('-') then -n else n) + } + + function Number(js: Grammar.jnumber): DeserializationResult { + var JNumber(minus, num, frac, exp) := js; + var n :- + ToInt(minus, num); + var e10 :- match exp + case Empty => Success(0) + case NonEmpty(JExp(_, sign, num)) => ToInt(sign, num); + match frac + case Empty => Success(Values.Decimal(n, e10)) + case NonEmpty(JFrac(_, num)) => + var pow10 := num.Length() as int; + var frac :- ToInt(minus, num); + Success(Values.Decimal(n * Pow(10, pow10) + frac, e10 - pow10)) + } + + function KeyValue(js: Grammar.jKeyValue): DeserializationResult<(string, Values.JSON)> { + var k :- String(js.k); + var v :- Value(js.v); + Success((k, v)) + } + + function Object(js: Grammar.jobject): DeserializationResult> { + Seq.MapWithResult(d requires d in js.data => KeyValue(d.t), js.data) + } + + function Array(js: Grammar.jarray): DeserializationResult> { + Seq.MapWithResult(d requires d in js.data => Value(d.t), js.data) + } + + function Value(js: Grammar.Value): DeserializationResult { + match js + case Null(_) => Success(Values.Null()) + case Bool(b) => Success(Values.Bool(Bool(b))) + case String(str) => var s :- String(str); Success(Values.String(s)) + case Number(dec) => var n :- Number(dec); Success(Values.Number(n)) + case Object(obj) => var o :- Object(obj); Success(Values.Object(o)) + case Array(arr) => var a :- Array(arr); Success(Values.Array(a)) + } + + function JSON(js: Grammar.JSON): DeserializationResult { + Value(js.t) + } +} diff --git a/src/JSON/Errors.dfy b/src/JSON/Errors.dfy new file mode 100644 index 00000000..891cffc3 --- /dev/null +++ b/src/JSON/Errors.dfy @@ -0,0 +1,61 @@ +// RUN: %verify "%s" + +include "../Wrappers.dfy" +include "../BoundedInts.dfy" +include "Utils/Str.dfy" + +module {:options "-functionSyntax:4"} JSON.Errors { + import Wrappers + import opened BoundedInts + import Utils.Str + + datatype DeserializationError = + | UnterminatedSequence + | UnsupportedEscape(str: string) + | EscapeAtEOS + | EmptyNumber + | ExpectingEOF + | IntOverflow + | ReachedEOF + | ExpectingByte(expected: byte, b: opt_byte) + | ExpectingAnyByte(expected_sq: seq, b: opt_byte) + | InvalidUnicode + { + function ToString() : string { + match this + case UnterminatedSequence => "Unterminated sequence" + case UnsupportedEscape(str) => "Unsupported escape sequence: " + str + case EscapeAtEOS => "Escape character at end of string" + case EmptyNumber => "Number must contain at least one digit" + case ExpectingEOF => "Expecting EOF" + case IntOverflow => "Input length does not fit in a 32-bit counter" + case ReachedEOF => "Reached EOF" + case ExpectingByte(b0, b) => + var c := if b > 0 then "'" + [b as char] + "'" else "EOF"; + "Expecting '" + [b0 as char] + "', read " + c + case ExpectingAnyByte(bs0, b) => + var c := if b > 0 then "'" + [b as char] + "'" else "EOF"; + var c0s := seq(|bs0|, idx requires 0 <= idx < |bs0| => bs0[idx] as char); + "Expecting one of '" + c0s + "', read " + c + case InvalidUnicode => "Invalid Unicode sequence" + } + } + + datatype SerializationError = + | OutOfMemory + | IntTooLarge(i: int) + | StringTooLong(s: string) + | InvalidUnicode + { + function ToString() : string { + match this + case OutOfMemory => "Out of memory" + case IntTooLarge(i: int) => "Integer too large: " + Str.OfInt(i) + case StringTooLong(s: string) => "String too long: " + s + case InvalidUnicode => "Invalid Unicode sequence" + } + } + + type SerializationResult<+T> = Wrappers.Result + type DeserializationResult<+T> = Wrappers.Result +} diff --git a/src/JSON/Grammar.dfy b/src/JSON/Grammar.dfy new file mode 100644 index 00000000..c32b8b3c --- /dev/null +++ b/src/JSON/Grammar.dfy @@ -0,0 +1,102 @@ +// RUN: %verify "%s" + +/// ========================================== +/// Low-level JSON grammar (Concrete syntax) +/// ========================================== +/// +/// See ``JSON.Values`` for the high-level interface. + +include "../BoundedInts.dfy" +include "Utils/Views.dfy" + +module {:options "-functionSyntax:4"} JSON.Grammar { + import opened BoundedInts + import opened Utils.Views.Core + + const EMPTY := View.OfBytes([]) + const DOUBLEQUOTE := View.OfBytes(['\"' as byte]) + const PERIOD := View.OfBytes(['.' as byte]) + const E := View.OfBytes(['e' as byte]) + const COLON := View.OfBytes([':' as byte]) + const COMMA := View.OfBytes([',' as byte]) + const LBRACE := View.OfBytes(['{' as byte]) + const RBRACE := View.OfBytes(['}' as byte]) + const LBRACKET := View.OfBytes(['[' as byte]) + const RBRACKET := View.OfBytes([']' as byte]) + const MINUS := View.OfBytes(['-' as byte]) + + type jchar = v: View | v.Length() == 1 witness View.OfBytes(['b' as byte]) + type jquote = v: View | v.Char?('\"') witness DOUBLEQUOTE + type jperiod = v: View | v.Char?('.') witness PERIOD + type je = v: View | v.Char?('e') || v.Char?('E') witness E + type jcolon = v: View | v.Char?(':') witness COLON + type jcomma = v: View | v.Char?(',') witness COMMA + type jlbrace = v: View | v.Char?('{') witness LBRACE + type jrbrace = v: View | v.Char?('}') witness RBRACE + type jlbracket = v: View | v.Char?('[') witness LBRACKET + type jrbracket = v: View | v.Char?(']') witness RBRACKET + type jminus = v: View | v.Char?('-') || v.Empty? witness MINUS + type jsign = v: View | v.Char?('-') || v.Char?('+') || v.Empty? witness EMPTY + + predicate Blank?(b: byte) { b == 0x20 || b == 0x09 || b == 0x0A || b == 0x0D } + ghost predicate Blanks?(v: View) { forall b | b in v.Bytes() :: Blank?(b) } + type jblanks = v: View | Blanks?(v) witness View.OfBytes([]) + + datatype Structural<+T> = + Structural(before: jblanks, t: T, after: jblanks) + + datatype Maybe<+T> = Empty() | NonEmpty(t: T) + + datatype Suffixed<+T, +S> = + Suffixed(t: T, suffix: Maybe>) + + type SuffixedSequence<+D, +S> = s: seq> | NoTrailingSuffix(s) + ghost predicate NoTrailingSuffix(s: seq>) { + forall idx | 0 <= idx < |s| :: s[idx].suffix.Empty? <==> idx == |s| - 1 + } + + datatype Bracketed<+L, +D, +S, +R> = + Bracketed(l: Structural, data: SuffixedSequence, r: Structural) + + const NULL: bytes := ['n' as byte, 'u' as byte, 'l' as byte, 'l' as byte] + const TRUE: bytes := ['t' as byte, 'r' as byte, 'u' as byte, 'e' as byte] + const FALSE: bytes := ['f' as byte, 'a' as byte, 'l' as byte, 's' as byte, 'e' as byte] + + ghost predicate Null?(v: View) { v.Bytes() == NULL } + ghost predicate Bool?(v: View) { v.Bytes() in {TRUE, FALSE} } + predicate Digit?(b: byte) { '0' as byte <= b <= '9' as byte } + ghost predicate Digits?(v: View) { forall b | b in v.Bytes() :: Digit?(b) } + ghost predicate Num?(v: View) { Digits?(v) && !v.Empty? } + ghost predicate Int?(v: View) { v.Char?('0') || (Num?(v) && v.At(0) != '0' as byte) } + + type jnull = v: View | Null?(v) witness View.OfBytes(NULL) + type jbool = v: View | Bool?(v) witness View.OfBytes(TRUE) + type jdigits = v: View | Digits?(v) witness View.OfBytes([]) + type jnum = v: View | Num?(v) witness View.OfBytes(['0' as byte]) + type jint = v: View | Int?(v) witness View.OfBytes(['0' as byte]) + type jstr = v: View | true witness View.OfBytes([]) // TODO: Enforce quoting and escaping + datatype jstring = JString(lq: jquote, contents: jstr, rq: jquote) + datatype jKeyValue = KeyValue(k: jstring, colon: Structural, v: Value) + + // TODO enforce no leading space before closing bracket to disambiguate WS { WS WS } WS + type jobject = Bracketed + type jarray = Bracketed + type jmembers = SuffixedSequence + type jmember = Suffixed + type jitems = SuffixedSequence + type jitem = Suffixed + + datatype jfrac = JFrac(period: jperiod, num: jnum) + datatype jexp = JExp(e: je, sign: jsign, num: jnum) + datatype jnumber = JNumber(minus: jminus, num: jnum, frac: Maybe, exp: Maybe) + + datatype Value = + | Null(n: jnull) + | Bool(b: jbool) + | String(str: jstring) + | Number(num: jnumber) + | Object(obj: jobject) + | Array(arr: jarray) + + type JSON = Structural +} diff --git a/src/JSON/README.md b/src/JSON/README.md new file mode 100644 index 00000000..e81467da --- /dev/null +++ b/src/JSON/README.md @@ -0,0 +1,122 @@ +# JSON + +JSON serialization and deserialization in Dafny, as described in [RFC 8259](https://datatracker.ietf.org/doc/html/rfc8259). + +This library provides two APIs: + +- A low-level (zero-copy) API that is efficient, verified (see [What is verified?](#what-is-verified) below for details) and allows incremental changes (re-serialization is much faster for unchanged objects), but is more cumbersome to use. This API operates on concrete syntax trees that capture details of punctuation and blanks and represent strings using unescaped, undecoded utf-8 byte sequences. + +- A high-level API built on top of the previous one. This API is more convenient to use, but it is unverified and less efficient. It produces abstract datatype value trees that represent strings using Dafny's built-in `string` type. + +Both APIs provides functions for serialization (JSON values to utf-8 bytes) and deserialization (utf-8 bytes to JSON values). +See the Unicode module in `../Unicode` if you need to read or produce JSON text in other encodings. + + +## Library usage + +The tutorial in [`Tutorial.dfy`](Tutorial.dfy) shows how to import the library, call the high-level API, and use the low-level API to make localized modifications to a partial parse of a JSON AST. The main entry points are `API.Serialize` (to go from a JSON value to utf-8 bytes), and `API.Deserialize` (for the reverse operation): + + +```dafny +include "src/JSON/API.dfy" +// Or "..WithUnicodeChar.dfy" if you are using --unicode-char:true +include "src/Unicode/UnicodeStringsWithoutUnicodeChar.dfy" + +import JSON.API +import opened UnicodeStrings +import opened JSON.Values +import opened Wrappers + +method Test(){ + var CITY_JS :- expect ToUTF8Checked(@"{""Cities"": [{ + ""Name"": ""Boston"", + ""Founded"": 1630, + ""Population"": 689386, + ""Area (km2)"": 4584.2}]}"); + + var CITY_VALUE := Object([("Cities", Array([ + Object([ + ("Name", String("Boston")), + ("Founded", Number(Int(1630))), + ("Population", Number(Int(689386))), + ("Area (km2)", Number(Decimal(45842, -1)))])]))]); + + expect API.Deserialize(CITY_JS) == Success(CITY_VALUE); + + var EXPECTED :- expect ToUTF8Checked( + @"{""Cities"":[{""Name"":""Boston"",""Founded"":1630,""Population"":689386,""Area (km2)"":45842e-1}]}" + ); + + expect API.Serialize(CITY_VALUE) == Success(EXPECTED); +} +``` + +## What is verified? + +The zero-copy serializer is proved sound and complete against a simple functional specification found in [`ConcreteSyntax.Spec.dfy`](./ConcreteSyntax.Spec.dfy). The low-level deserializer is proven sound, but not complete, against that same specification: if a value is deserialized successfully, then re-serializing recovers the original bytestring. + +### Useful submodules + +Most of the contents of the `Utils/` directory are not specific to JSON. They are not sufficiently documented to be moved to the main library yet, but they provide useful functionality: + +- [`Views.dfy`](Utils/Views.dfy) and [`Views.Writers.dfy`](Utils/Views.Writers.dfy) implement slices over byte strings whose bounds are representable as `int32` native integers. Adjacent slices can be merged in time `O(1)` (but see [“Caveats”](#caveats) below). To be part of the main library, it would have to be generalized to slices over any sequences and to indices represented using arbitrary integers (though Dafny does not currently have a way to have modularity over int width). + +- [`Cursors.dfy`](Utils/Cursors.dfy) implements slices augmented with an inner pointer tracking a position. Functions are provided to skip over a specific byte or bytes, over bytes matching a given predicate, or over bytes recognized by a given lexer, and ghost functions are provided to express the fact that a cursor was obtained by trimming a prefix of another cursor. Cursors are used in the implementation of the parser: after skipping over a given construct, a cursor can be split into a view (capturing the part of the string matching the construct) and a new cursor starting at the previous position, with its pointer reset to the beginning of the string. To become part of the main library, cursors would need to be generalized in the same way as views above. + +- [`Lexers.dfy`](Utils/Lexers.dfy) contains a state machine that recognizes quoted strings and skips over backslash-escaped quotes. State-machine-based lexers are used in `Cursors.dfy` to skip over complex constructs (like quoted strings), though in practice the low-level parser uses a custom function-by-method to speed up this specific case. + +- [`Unicode.dfy`](Utils/Unicode.dfy) implements conversions between UTF-8 and UTF-16. These conversions would have to be more efficient to be included in the main library. [This page](http://bjoern.hoehrmann.de/utf-8/decoder/dfa/) provides reasonably efficient DFAs for this task; it would be an interesting project to port them to Dafny and verify them against the Unicode library in [`../Unicode/`](../Unicode/). + +- [`Vectors.dfy`](Utils/Vectors.dfy) implements resizable arrays (the C++ standard library calls them vectors). This module is reasonably close to being generally useful, but the way it tracks references needs to be revised if it if to be used for storing reference types. It would also be good to generalize the index type that it uses (at the moment, it only accepts `int32` indices). + +- [`Str.dfy`](Utils/Str.dfy) implements functions to convert between (big-endian) strings and numbers. This module shares similarities with [LittleEndianNat.dfy](../Collections/Sequences/LittleEndianNat.dfy) (except for the digit order), although `LittleEndianNat.dfy` makes the base a module parameter instead of a function argument. + +- [`Parsers.dfy`](Utils/Parsers.dfy) has definitions of well-formedness for parsers (stating that they must consume part of their input). This file would have to be significantly expanded to create a composable library of parsing combinators to be useful as part of the main library. + +## Caveats + +- Not all parts of this library are verified. In particular, the high-level API is not verified (the most complex part of it is the string escaping code). + +- The optimization used to merge contiguous slices can have poor performance in corner cases. Specifically, the function `View.Adjacent`, which checks whether two slices can be concatenated efficiently, uses Dafny's value equality, not reference equality, to check whether both slices point into the same string. The value-equality check is `O(1)` when the strings are physically equal, but may take linear time otherwise. + + The worst-case behavior happens when serializing a low-level JSON object in which the `View`s all point to (physically) different strings with equal contents. This cannot happen when modifying the result of a previous parse (in that case, all views will share the same underlying storage, and the comparison will be fast). + + This issue could be fixed by using reference equality, but doing so requires defining an external function to expose (a restricted form of) reference equality on values (exposing reference equality in general on values is not sound). A good description of the relevant technique can be found in chapter 9 of “Verasco: a Formally Verified C Static Analyzer” by Jacques-Henri Jourdan. + +### Implementation notes + +- The division into a low-level and a high-level API achieves two desirable outcomes: + + - Programs that modify a small part of a JSON object can be implemented much more efficiently that using a traditional JSON AST ↔ bytes API, since unmodified parts of the decoded JSON object can be reserialized very quickly. + + - Specs and proofs for the low-level API are simple because the low-level API captures all encoding details (including the amount of whitespace used), which makes serialization and deserialization uniquely determined. As a result, serialization is really the (left) inverse of deserializations. + +- Most of the low-level API uses bounded integers (`int32`), so the library cannot deserialize more than 4GB of JSON text. + +- To support in-place updates, the low-level API supports serializing into an existing buffer. Instead of repeatedly checking for overflows, the implementation uses saturating addition and checks for overflow only once, after writing the whole object. This optimization was inspired by Go's error-handling style, described in [Errors are values](https://go.dev/blog/errors-are-values). + +- Dafny does not support floating point numbers (and the JSON spec does not mandate a specific precision), so the library uses a pair `(n, e10)` instead representing the number `n * 10^e10`. + +- Workarounds for known Dafny bugs are indicated by the keyword [`BUG`](https://github.com/dafny-lang/libraries/search?q=BUG) in the sources. + +## TODOs and contribution opportunities + +### Verification + +The high-level API is unverified. Unlike the low-level API, general JSON serialization and deserialization are not inverses of each other, because deserializing an object and re-serializing it may add or remove whitespace or change the way strings or numbers are represented (e.g. the string `"a"` may be serialized as `"\u0061"` and the number 1 as 1.0e0, 10e-1, etc.). As a result, an executable serializer is not a sufficient specification of the format. Techniques to write specifications and verify serializers and deserializers in such cases are described in e.g. [Narcissus: correct-by-construction derivation of decoders and encoders from binary formats](https://dl.acm.org/doi/abs/10.1145/3341686). + +Note that it would be possible (and a great start) to show that the deserializer supports everything that the serializer may produce. This would not require a generalized specification (one that allows variation in serialization choices) of the format. + +Beyond this large task, some small lemmas that should hold but were not needed by this codebase are commented out and marked as `// TODO`. + +### Performance problems in the high-level API + +The high-level API does string encoding and escaping as two separate passes that both construct sequences. Adding `by method` blocks and fusing these two passes would provide significant gains. + +### Performance opportunities in the low-level API + +The low-level API is written in the pure subset of Dafny. As a result, it creates lots of short-lived records (especially cursors, defined in [`Cursors.dfy`](Utils/Cursors.dfy)). A linear version of Dafny would be able to mutate these structures in place. An traditional reuse analysis (see e.g. Schulte & Grieskamp, “Generating Efficient Portable Code for a Strict Applicative Language”, or any of the OPAL papers) would achieve the same result. Large gains could already be achieved simply by improving the compilation of records in the C# backend (e.g. using structs for small records and removing unnecessary interfaces). + +The optimization to merge contiguous string could be made to use physical equality. This would provide little gain in the typical case, but would eliminate the worst-case described in [“Caveats”](#caveats) above. + +The low-level serializer does not re-serialize unchanged parts of an AST, but it still re-writes the whole object. This could be eliminated when changes to the object do not change its overall length. diff --git a/src/JSON/Serializer.dfy b/src/JSON/Serializer.dfy new file mode 100644 index 00000000..1f936deb --- /dev/null +++ b/src/JSON/Serializer.dfy @@ -0,0 +1,153 @@ +// RUN: %verify "%s" --unicode-char:false ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %verify "%s" --unicode-char:true ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +/// ================================================ +/// Serialization from JSON.Values to JSON.Grammar +/// ================================================ +/// +/// For the spec, see ``JSON.Spec.dfy``. + +include "../Collections/Sequences/Seq.dfy" +include "../BoundedInts.dfy" +include "../Math.dfy" + +include "Utils/Views.dfy" +include "Utils/Vectors.dfy" +include "Errors.dfy" +include "Values.dfy" +include "Grammar.dfy" +include "Spec.dfy" + +module {:options "-functionSyntax:4"} JSON.Serializer { + import Seq + import Math + import opened Wrappers + import opened BoundedInts + import opened Utils.Str + + import Values + import Spec + import opened Errors + import opened Utils.Vectors + import opened Grammar + import opened Utils.Views.Core + + type Result<+T> = SerializationResult + + type bytes = seq + type bytes32 = bs: bytes | |bs| < TWO_TO_THE_32 + type string32 = s: string | |s| < TWO_TO_THE_32 + + function Bool(b: bool): jbool { + View.OfBytes(if b then TRUE else FALSE) + } + + function CheckLength(s: seq, err: SerializationError): Outcome { + Need(|s| < TWO_TO_THE_32, err) + } + + function String(str: string): Result { + var bs :- Spec.EscapeToUTF8(str); + :- CheckLength(bs, StringTooLong(str)); + Success(Grammar.JString(Grammar.DOUBLEQUOTE, View.OfBytes(bs), Grammar.DOUBLEQUOTE)) + } + + function Sign(n: int): jminus { + View.OfBytes(if n < 0 then ['-' as byte] else []) + } + + module ByteStrConversion refines Str.ParametricConversion { + import opened BoundedInts + type Char = uint8 + } + + const DIGITS := [ + '0' as uint8, '1' as uint8, '2' as uint8, '3' as uint8, + '4' as uint8, '5' as uint8, '6' as uint8, '7' as uint8, + '8' as uint8, '9' as uint8 + ] + + const MINUS := '-' as uint8 + + function Int'(n: int) : (str: bytes) + ensures forall c | c in str :: c in DIGITS || c == MINUS + { + ByteStrConversion.OfInt_any(n, DIGITS, MINUS) + } + + function Int(n: int) : Result { + var bs := Int'(n); + :- CheckLength(bs, IntTooLarge(n)); + Success(View.OfBytes(bs)) + } + + function Number(dec: Values.Decimal): Result { + var minus: jminus := Sign(dec.n); + var num: jnum :- Int(Math.Abs(dec.n)); + var frac: Maybe := Empty(); + var exp: Maybe :- + if dec.e10 == 0 then + Success(Empty()) + else + var e: je := View.OfBytes(['e' as byte]); + var sign: jsign := Sign(dec.e10); + var num: jnum :- Int(Math.Abs(dec.e10)); + Success(NonEmpty(JExp(e, sign, num))); + Success(JNumber(minus, num, Empty, exp)) + } + + function MkStructural(v: T): Structural { + Structural(EMPTY, v, EMPTY) + } + + const COLON: Structural := + MkStructural(Grammar.COLON) + + function KeyValue(kv: (string, Values.JSON)): Result { + var k :- String(kv.0); + var v :- Value(kv.1); + Success(Grammar.KeyValue(k, COLON, v)) + } + + function MkSuffixedSequence(ds: seq, suffix: Structural, start: nat := 0) + : SuffixedSequence + decreases |ds| - start + { + if start >= |ds| then [] + else if start == |ds| - 1 then [Suffixed(ds[start], Empty)] + else [Suffixed(ds[start], NonEmpty(suffix))] + MkSuffixedSequence(ds, suffix, start + 1) + } + + const COMMA: Structural := + MkStructural(Grammar.COMMA) + + function Object(obj: seq<(string, Values.JSON)>): Result { + var items :- Seq.MapWithResult(v requires v in obj => KeyValue(v), obj); + Success(Bracketed(MkStructural(LBRACE), + MkSuffixedSequence(items, COMMA), + MkStructural(RBRACE))) + } + + + function Array(arr: seq): Result { + var items :- Seq.MapWithResult(v requires v in arr => Value(v), arr); + Success(Bracketed(MkStructural(LBRACKET), + MkSuffixedSequence(items, COMMA), + MkStructural(RBRACKET))) + } + + function Value(js: Values.JSON): Result { + match js + case Null => Success(Grammar.Null(View.OfBytes(NULL))) + case Bool(b) => Success(Grammar.Bool(Bool(b))) + case String(str) => var s :- String(str); Success(Grammar.String(s)) + case Number(dec) => var n :- Number(dec); Success(Grammar.Number(n)) + case Object(obj) => var o :- Object(obj); Success(Grammar.Object(o)) + case Array(arr) => var a :- Array(arr); Success(Grammar.Array(a)) + } + + function JSON(js: Values.JSON): Result { + var val :- Value(js); + Success(MkStructural(val)) + } +} diff --git a/src/JSON/Spec.dfy b/src/JSON/Spec.dfy new file mode 100644 index 00000000..8a76ea1e --- /dev/null +++ b/src/JSON/Spec.dfy @@ -0,0 +1,148 @@ +// RUN: %verify "%s" --unicode-char:false ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %verify "%s" --unicode-char:true ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +/// ================================================ +/// Serialization from Values.JSON to bytes (Spec) +/// ================================================ +/// +/// This is the high-level spec. For the implementation, see +/// ``JSON.Serializer.dfy``. + +include "../BoundedInts.dfy" +include "../NonlinearArithmetic/Logarithm.dfy" +include "../Collections/Sequences/Seq.dfy" +/// include one of these two files externally as well: +/// "../Unicode/UnicodeStringsWithoutUnicodeChar.dfy" +/// "../Unicode/UnicodeStringsWithUnicodeChar.dfy" + +include "Values.dfy" +include "Errors.dfy" +include "Utils/Str.dfy" + +module {:options "-functionSyntax:4"} JSON.Spec { + import opened BoundedInts + + import opened Utils.Str + import opened Values + import opened Wrappers + import opened Errors + import opened UnicodeStrings + import opened Logarithm + + import Seq + + type bytes = seq + type Result<+T> = SerializationResult + + function EscapeUnicode(c: uint16): seq { + var sStr := Str.OfNat(c as nat, 16); + Seq.MembershipImpliesIndexing(c => 0 <= c as int < 128, sStr); + var s := ASCIIToUTF16(sStr); + assert |s| <= 4 by { + assert c as nat <= 0xFFFF; + assert Log(16, c as nat) <= Log(16, 0xFFFF) by { + LemmaLogIsOrdered(16, c as nat, 0xFFFF); + } + assert Log(16, 0xFFFF) == 3 by { reveal Log(); } + } + s + seq(4 - |s|, _ => ' ' as uint16) + } + + function Escape(str: seq, start: nat := 0): seq + decreases |str| - start + { + if start >= |str| then [] + else + (match str[start] + case 0x22 => ASCIIToUTF16("\\\"") // quotation mark + case 0x5C => ASCIIToUTF16("\\\\") // reverse solidus + case 0x08 => ASCIIToUTF16("\\b") // backspace + case 0x0C => ASCIIToUTF16("\\f") // form feed + case 0x0A => ASCIIToUTF16("\\n") // line feed + case 0x0D => ASCIIToUTF16("\\r") // carriage return + case 0x09 => ASCIIToUTF16("\\t") // tab + case c => + if c < 0x001F then ASCIIToUTF16("\\u") + EscapeUnicode(c) + else [str[start]]) + + Escape(str, start + 1) + } + + function EscapeToUTF8(str: string, start: nat := 0): Result { + var utf16 :- ToUTF16Checked(str).ToResult'(SerializationError.InvalidUnicode); + var escaped := Escape(utf16); + var utf32 :- FromUTF16Checked(escaped).ToResult'(SerializationError.InvalidUnicode); + ToUTF8Checked(utf32).ToResult'(SerializationError.InvalidUnicode) + } + + // Can fail due to invalid UTF-16 sequences in a string when --unicode-char is off + function String(str: string): Result { + var inBytes :- EscapeToUTF8(str); + Success(ASCIIToUTF8("\"") + inBytes + ASCIIToUTF8("\"")) + } + + lemma OfIntOnlyASCII(n: int) + ensures + && var s := Str.OfInt(n); + && forall i | 0 <= i < |s| :: 0 <= s[i] as int < 128 + { + var s := Str.OfInt(n); + forall i | 0 <= i < |s| ensures 0 <= s[i] as int < 128 { + if i == 0 { + } else { + var isHexDigit := c => c in HEX_DIGITS; + assert CharStrConversion.NumberStr(s, '-', isHexDigit); + assert isHexDigit(s[i]); + } + } + } + + function IntToBytes(n: int): bytes { + var s := Str.OfInt(n); + OfIntOnlyASCII(n); + ASCIIToUTF8(s) + } + + function Number(dec: Decimal): Result { + Success(IntToBytes(dec.n) + + (if dec.e10 == 0 then [] + else ASCIIToUTF8("e") + IntToBytes(dec.e10))) + } + + function KeyValue(kv: (string, JSON)): Result { + var key :- String(kv.0); + var value :- JSON(kv.1); + Success(key + ASCIIToUTF8(":") + value) + } + + function Join(sep: bytes, items: seq>): Result { + if |items| == 0 then + Success([]) + else + var first :- items[0]; + if |items| == 1 then + Success(first) + else + var rest :- Join(sep, items[1..]); + Success(first + sep + rest) + } + + function Object(obj: seq<(string, JSON)>): Result { + var middle :- Join(ASCIIToUTF8(","), seq(|obj|, i requires 0 <= i < |obj| => KeyValue(obj[i]))); + Success(ASCIIToUTF8("{") + middle + ASCIIToUTF8("}")) + } + + function Array(arr: seq): Result { + var middle :- Join(ASCIIToUTF8(","), seq(|arr|, i requires 0 <= i < |arr| => JSON(arr[i]))); + Success(ASCIIToUTF8("[") + middle + ASCIIToUTF8("]")) + } + + function JSON(js: JSON): Result { + match js + case Null => Success(ASCIIToUTF8("null")) + case Bool(b) => Success(if b then ASCIIToUTF8("true") else ASCIIToUTF8("false")) + case String(str) => String(str) + case Number(dec) => Number(dec) + case Object(obj) => Object(obj) + case Array(arr) => Array(arr) + } +} diff --git a/src/JSON/Tests.dfy b/src/JSON/Tests.dfy new file mode 100644 index 00000000..8051e75f --- /dev/null +++ b/src/JSON/Tests.dfy @@ -0,0 +1,136 @@ +// RUN: %run "%s" --unicode-char:false --input ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %run "%s" --unicode-char:true --input ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +// TODO: Test for Java and other target languages too + +include "Errors.dfy" +include "API.dfy" +include "ZeroCopy/API.dfy" +include "../Collections/Sequences/Seq.dfy" + +abstract module JSON.Tests.Wrapper { + import Utils.Str + import opened BoundedInts + import opened UnicodeStrings + import opened Errors + + type JSON + method Deserialize(bs: bytes) returns (js: DeserializationResult) + method SpecSerialize(js: JSON) returns (bs: SerializationResult) + method Serialize(js: JSON) returns (bs: SerializationResult) + method Check(bs: bytes, js: JSON, bs': bytes, sbs': bytes, js': JSON) + + method TestBytestring(bs: bytes, indent: string) { + var js :- expect Deserialize(bs); + // print indent, "=> ", js, "\n"; + var bs' :- expect Serialize(js); + print indent, "=> ", FromUTF8Checked(bs'), "\n"; + var sbs' :- expect SpecSerialize(js); + print indent, "=> ", FromUTF8Checked(sbs'), "\n"; + var js' :- expect Deserialize(bs'); + Check(bs, js, bs', sbs', js'); + } + + method TestString(str: string, indent: string) { + var bs :- expect ToUTF8Checked(str); + TestBytestring(bs, indent); + } + + method TestStrings(vectors: seq) { + for i := 0 to |vectors| { + var input := vectors[i]; + var idx := Str.OfInt(i); + var indent := seq(|idx| + 1, _ => ' '); + print "[", idx, "]: ", input, "\n"; + TestString(input, indent); + print "\n"; + } + } +} + +module JSON.Tests.ZeroCopyWrapper refines Wrapper { + import opened Wrappers + import Grammar + import ZeroCopy.API + import ConcreteSyntax.Spec + + type JSON = Grammar.JSON + + method Deserialize(bs: bytes) returns (js: DeserializationResult) { + js := API.Deserialize(bs); + } + + method Serialize(js: JSON) returns (bs: SerializationResult) { + // print "Count: ", wr.chain.Count(), "\n"; + bs := API.Serialize(js); + } + + method SpecSerialize(js: JSON) returns (bs: SerializationResult) { + bs := Success(Spec.JSON(js)); + } + + method Check(bs: bytes, js: JSON, bs': bytes, sbs': bytes, js': JSON) { + expect sbs' == bs' == bs; + expect js' == js; // This doesn't hold in general, since the views could be different + } +} + +module JSON.Tests.AbstractSyntaxWrapper refines Wrapper { + import opened Wrappers + import Grammar + import API + import Values + import Spec + + type JSON = Values.JSON + + method Deserialize(bs: bytes) returns (js: DeserializationResult) { + js := API.Deserialize(bs); + } + + method Serialize(js: JSON) returns (bs: SerializationResult) { + bs := API.Serialize(js); + } + + method SpecSerialize(js: JSON) returns (bs: SerializationResult) { + bs := Spec.JSON(js); + } + + method Check(bs: bytes, js: JSON, bs': bytes, sbs': bytes, js': JSON) { + expect sbs' == bs'; // Serializing changes number representations, escapes, and spacing, so no == bs + expect js' == js; + } +} + +module JSON.Tests { + const VECTORS := [ + "true", + "false", + "null", + "\"\"", + "\"string\"", + "[\"A\"]", + "-123.456e-18", + "[]", + "[ ]", + "[1]", + "[1, 2]", + "{}", + "{ \"a\": 1 }", + "{ \"a\": \"b\" }", + "{ \"some\" : \"string\", \"and\": [ \"a number\", -123.456e-18 ] }", + + " true ", + " { } ", + "\"\\t\\r\\n\\f\"", + "\"∀ABC // \\u2200ABC\"", // ∀ + "\"🇫🇷 // \\u1f1eb\\u1f1EBABC\"", // 🇫🇷 + + "[true, false , null, { \"some\" : \"string\", \"and\": [ \"a number\", -123.456e-18 ] } ] " + ]; + + method Main() { + ZeroCopyWrapper.TestStrings(VECTORS); + AbstractSyntaxWrapper.TestStrings(VECTORS); + } +} diff --git a/src/JSON/Tutorial.dfy b/src/JSON/Tutorial.dfy new file mode 100644 index 00000000..04800fd5 --- /dev/null +++ b/src/JSON/Tutorial.dfy @@ -0,0 +1,286 @@ +// RUN: %run "%s" --unicode-char:false --input ../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %run "%s" --unicode-char:true --input ../Unicode/UnicodeStringsWithUnicodeChar.dfy + +/// # Using the JSON library + +include "API.dfy" +include "ZeroCopy/API.dfy" + +/// This library offers two APIs: a high-level one (giving abstract value trees +/// with no concrete syntactic details) and a low-level one (including all +/// information about blanks, separator positions, character escapes, etc.). +/// +/// ## High-level API (JSON values) + +module {:options "-functionSyntax:4"} JSON.Examples.AbstractSyntax { + import API + import opened Values + import opened Wrappers + +/// Note that you will need to include one of the two files that defines UnicodeStrings +/// according to whether you are using --unicode-char:false or --unicode-char:true. +/// See ../../Unicode/UnicodeStrings.dfy for more details. + + import opened UnicodeStrings + +/// The high-level API works with fairly simple datatype values that contain native Dafny +/// strings: + + method {:test} Test() { + +/// Use `API.Deserialize` to deserialize a byte string. +/// +/// For example, here is how to decode the JSON test `"[true]"`. (We need to +/// convert from Dafny's native strings to byte strings because Dafny does not +/// have syntax for byte strings; in a real application, we would be reading and +/// writing raw bytes directly from disk or from the network instead). + + var SIMPLE_JS :- expect ToUTF8Checked("[true]"); + var SIMPLE_VALUE := Array([Bool(true)]); + expect API.Deserialize(SIMPLE_JS) == Success(SIMPLE_VALUE); + +/// Here is a larger object, written using a verbatim string (with `@"`). In +/// verbatim strings `""` represents a single double-quote character): + + var CITIES_JS :- expect ToUTF8Checked(@"{ + ""Cities"": [ + { + ""Name"": ""Boston"", + ""Founded"": 1630, + ""Population"": 689386, + ""Area (km2)"": 4584.2 + }, { + ""Name"": ""Rome"", + ""Founded"": -753, + ""Population"": 2.873e6, + ""Area (km2)"": 1285 + }, { + ""Name"": ""Paris"", + ""Founded"": null, + ""Population"": 2.161e6, + ""Area (km2)"": 2383.5 + } + ] + }"); + var CITIES_VALUE := + Object([ + ("Cities", Array([ + Object([ + ("Name", String("Boston")), + ("Founded", Number(Int(1630))), + ("Population", Number(Int(689386))), + ("Area (km2)", Number(Decimal(45842, -1))) + ]), + Object([ + ("Name", String("Rome")), + ("Founded", Number(Int(-753))), + ("Population", Number(Decimal(2873, 3))), + ("Area (km2)", Number(Int(1285))) + ]), + Object([ + ("Name", String("Paris")), + ("Founded", Null), + ("Population", Number(Decimal(2161, 3))), + ("Area (km2)", Number(Decimal(23835, -1))) + ]) + ])) + ]); + expect API.Deserialize(CITIES_JS) == Success(CITIES_VALUE); + +/// Serialization works similarly, with `API.Serialize`. For this first example +/// the generated string matches what we started with exactly: + + expect API.Serialize(SIMPLE_VALUE) == Success(SIMPLE_JS); + +/// For more complex object, the generated layout may not be exactly the same; note in particular how the representation of numbers and the whitespace have changed. + + var EXPECTED :- expect ToUTF8Checked( + @"{""Cities"":[{""Name"":""Boston"",""Founded"":1630,""Population"":689386,""Area (km2)"":45842e-1},{""Name"":""Rome"",""Founded"":-753,""Population"":2873e3,""Area (km2)"":1285},{""Name"":""Paris"",""Founded"":null,""Population"":2161e3,""Area (km2)"":23835e-1}]}" + ); + + expect API.Serialize(CITIES_VALUE) == Success(EXPECTED); + +/// Additional methods are defined in `API.dfy` to serialize an object into an +/// existing buffer or into an array. Below is the smaller example from the +/// README, as a sanity check: + + var CITY_JS :- expect ToUTF8Checked(@"{""Cities"": [{ + ""Name"": ""Boston"", + ""Founded"": 1630, + ""Population"": 689386, + ""Area (km2)"": 4584.2}]}"); + + var CITY_VALUE := + Object([("Cities", Array([ + Object([ + ("Name", String("Boston")), + ("Founded", Number(Int(1630))), + ("Population", Number(Int(689386))), + ("Area (km2)", Number(Decimal(45842, -1)))])]))]); + + expect API.Deserialize(CITY_JS) == Success(CITY_VALUE); + + var EXPECTED' :- expect ToUTF8Checked( + @"{""Cities"":[{""Name"":""Boston"",""Founded"":1630,""Population"":689386,""Area (km2)"":45842e-1}]}" + ); + + expect API.Serialize(CITY_VALUE) == Success(EXPECTED'); + } +} + +/// ## Low-level API (concrete syntax) +/// +/// If you care about low-level performance, or about preserving existing +/// formatting as much as possible, you may prefer to use the lower-level API: + +module {:options "-functionSyntax:4"} JSON.Examples.ConcreteSyntax { + import ZeroCopy.API + import opened UnicodeStrings + import opened Grammar + import opened Wrappers + +/// The low-level API works with ASTs that record all details of formatting and +/// encoding: each node contains pointers to parts of a string, such that +/// concatenating the fields of all nodes reconstructs the serialized value. + + method {:test} Test() { + +/// The low-level API exposes the same functions and methods as the high-level +/// one, but the type that they consume and produce is `Grammar.JSON` (defined +/// in `Grammar.dfy` as a `Grammar.Value` surrounded by optional whitespace) +/// instead of `Values.JSON` (defined in `Values.dfy`). Since `Grammar.JSON` contains +/// all formatting information, re-serializing an object produces the original +/// value: + + var CITIES :- expect ToUTF8Checked(@"{ + ""Cities"": [ + { + ""Name"": ""Boston"", + ""Founded"": 1630, + ""Population"": 689386, + ""Area (km2)"": 4600 + }, { + ""Name"": ""Rome"", + ""Founded"": -753, + ""Population"": 2.873e6, + ""Area (km2)"": 1285 + }, { + ""Name"": ""Paris"", + ""Founded"": null, + ""Population"": 2.161e6, + ""Area (km2)"": 2383.5 + } + ] + }"); + + var deserialized :- expect API.Deserialize(CITIES); + expect API.Serialize(deserialized) == Success(CITIES); + +/// Since the formatting is preserved, it is also possible to write +/// minimally-invasive transformations over an AST. For example, let's replace +/// `null` in the object above with `"Unknown"`. +/// +/// First, we construct a JSON value for the string `"Unknown"`; this could be +/// done by hand using `View.OfBytes()`, but using `API.Deserialize` is even +/// simpler: + var UNKNOWN_JS :- expect ToUTF8Checked(@"""Unknown"""); + var UNKNOWN :- expect API.Deserialize(UNKNOWN_JS); + +/// `UNKNOWN` is of type `Grammar.JSON`, which contains optional whitespace and +/// a `Grammar.Value` under the name `UNKNOWN.t`, which we can use in the +/// replacement: + + var without_null := deserialized.(t := ReplaceNull(deserialized.t, UNKNOWN.t)); + +/// Then, if we reserialize, we see that all formatting (and, in fact, all of +/// the serialization work) has been reused: + + var expected_js :- expect ToUTF8Checked(@"{ + ""Cities"": [ + { + ""Name"": ""Boston"", + ""Founded"": 1630, + ""Population"": 689386, + ""Area (km2)"": 4600 + }, { + ""Name"": ""Rome"", + ""Founded"": -753, + ""Population"": 2.873e6, + ""Area (km2)"": 1285 + }, { + ""Name"": ""Paris"", + ""Founded"": ""Unknown"", + ""Population"": 2.161e6, + ""Area (km2)"": 2383.5 + } + ] + }"); + var actual_js :- expect API.Serialize(without_null); + expect actual_js == expected_js; + } + +/// All that remains is to write the recursive traversal: + + import Seq + + function ReplaceNull(js: Value, replacement: Value): Value { + match js + +/// Non-recursive cases are untouched: + + case Bool(_) => js + case String(_) => js + case Number(_) => js + +/// `Null` is replaced with the new `replacement` value: + + case Null(_) => replacement + +/// … and objects and arrays are traversed recursively (only the data part of is +/// traversed: other fields record information about the formatting of braces, +/// square brackets, and whitespace, and can thus be reused without +/// modifications): + + case Object(obj) => + Object(obj.(data := MapSuffixedSequence(obj.data, (s: Suffixed) requires s in obj.data => + s.t.(v := ReplaceNull(s.t.v, replacement))))) + case Array(arr) => + Array(arr.(data := MapSuffixedSequence(arr.data, (s: Suffixed) requires s in arr.data => + ReplaceNull(s.t, replacement)))) + } + +/// Note that well-formedness criteria on the low-level AST are enforced using +/// subset types, which is why we need a bit more work to iterate over the +/// sequences of key-value paris and of values in objects and arrays. +/// Specifically, we need to prove that mapping over these sequences doesn't +/// introduce dangling punctuation (`NoTrailingSuffix`). We package this +/// reasoning into a `MapSuffixedSequence` function: + + function MapSuffixedSequence(sq: SuffixedSequence, fn: Suffixed --> D) + : SuffixedSequence + requires forall suffixed | suffixed in sq :: fn.requires(suffixed) + { + // BUG(https://github.com/dafny-lang/dafny/issues/2184) + // BUG(https://github.com/dafny-lang/dafny/issues/2690) + var fn' := (sf: Suffixed) requires (ghost var in_sq := sf => sf in sq; in_sq(sf)) => sf.(t := fn(sf)); + var sq' := Seq.Map(fn', sq); + + assert NoTrailingSuffix(sq') by { + forall idx | 0 <= idx < |sq'| ensures sq'[idx].suffix.Empty? <==> idx == |sq'| - 1 { + calc { + sq'[idx].suffix.Empty?; + fn'(sq[idx]).suffix.Empty?; + sq[idx].suffix.Empty?; + idx == |sq| - 1; + idx == |sq'| - 1; + } + } + } + + sq' + } +} + +/// The examples in this file can be run with `Dafny -compile:4 -runAllTests:1` +/// (the tests produce no output, but their calls to `expect` will be checked +/// dynamically). diff --git a/src/JSON/Tutorial.dfy.expect b/src/JSON/Tutorial.dfy.expect new file mode 100644 index 00000000..db4fbc9c --- /dev/null +++ b/src/JSON/Tutorial.dfy.expect @@ -0,0 +1,4 @@ + +Dafny program verifier finished with 3 verified, 0 errors +JSON.Examples.AbstractSyntax.Test: PASSED +JSON.Examples.ConcreteSyntax.Test: PASSED diff --git a/src/JSON/Utils/Cursors.dfy b/src/JSON/Utils/Cursors.dfy new file mode 100644 index 00000000..28d1f490 --- /dev/null +++ b/src/JSON/Utils/Cursors.dfy @@ -0,0 +1,336 @@ +// RUN: %verify "%s" + +include "../../BoundedInts.dfy" +include "../../Wrappers.dfy" +include "Views.dfy" +include "Lexers.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Cursors { + import opened BoundedInts + import opened Wrappers + + import opened Vs = Views.Core + import opened Lx = Lexers.Core + + datatype Split<+T> = SP(t: T, cs: FreshCursor) { + ghost predicate BytesSplitFrom?(cs0: Cursor, spec: T -> bytes) { + cs0.Bytes() == spec(t) + cs.Bytes() + } + + ghost predicate SplitFrom?(cs0: Cursor, spec: T -> bytes) { + cs.SplitFrom?(cs0) && BytesSplitFrom?(cs0, spec) + } + + ghost predicate StrictlySplitFrom?(cs0: Cursor, spec: T -> bytes) { + cs.StrictlySplitFrom?(cs0) && BytesSplitFrom?(cs0, spec) + } + } + + // LATER: Make this a newtype and put members here instead of requiring `Valid?` everywhere + type Cursor = ps: Cursor_ | ps.Valid? + witness Cursor([], 0, 0, 0) + type FreshCursor = ps: Cursor | ps.BOF? + witness Cursor([], 0, 0, 0) + + datatype CursorError<+R> = + | EOF + | ExpectingByte(expected: byte, b: opt_byte) + | ExpectingAnyByte(expected_sq: seq, b: opt_byte) + | OtherError(err: R) + { // TODO: Include positions in errors + function ToString(pr: R -> string) : string { + match this + case EOF => "Reached EOF" + case ExpectingByte(b0, b) => + var c := if b > 0 then "'" + [b as char] + "'" else "EOF"; + "Expecting '" + [b0 as char] + "', read " + c + case ExpectingAnyByte(bs0, b) => + var c := if b > 0 then "'" + [b as char] + "'" else "EOF"; + var c0s := seq(|bs0|, idx requires 0 <= idx < |bs0| => bs0[idx] as char); + "Expecting one of '" + c0s + "', read " + c + case OtherError(err) => pr(err) + } + } + type CursorResult<+R> = Result> + + datatype Cursor_ = Cursor(s: bytes, beg: uint32, point: uint32, end: uint32) { + ghost const Valid?: bool := + 0 <= beg as int <= point as int <= end as int <= |s| < TWO_TO_THE_32; + + const BOF? := + point == beg + + const EOF? := + point == end + + static function OfView(v: View) : FreshCursor { + Cursor(v.s, v.beg, v.beg, v.end) + } + + static function OfBytes(bs: bytes) : FreshCursor + requires |bs| < TWO_TO_THE_32 + { + Cursor(bs, 0, 0, |bs| as uint32) + } + + function Bytes() : bytes + requires Valid? + { + s[beg..end] + } + + ghost predicate StrictlyAdvancedFrom?(other: Cursor): (b: bool) + requires Valid? + ensures b ==> + SuffixLength() < other.SuffixLength() + ensures b ==> + beg == other.beg && end == other.end ==> + forall idx | beg <= idx < point :: s[idx] == other.s[idx] + { + && s == other.s + && beg == other.beg + && end == other.end + && point > other.point + } + + ghost predicate AdvancedFrom?(other: Cursor) + requires Valid? + { + || this == other + || StrictlyAdvancedFrom?(other) + } + + ghost predicate StrictSuffixOf?(other: Cursor) + requires Valid? + ensures StrictSuffixOf?(other) ==> + Length() < other.Length() + { + && s == other.s + && beg > other.beg + && end == other.end + } + + ghost predicate SuffixOf?(other: Cursor) + requires Valid? + { + || this == other + || StrictSuffixOf?(other) + } + + ghost predicate StrictlySplitFrom?(other: Cursor) + requires Valid? + { + && BOF? + && StrictSuffixOf?(other) + } + + ghost predicate SplitFrom?(other: Cursor) + requires Valid? + { + || this == other + || StrictlySplitFrom?(other) + } + + function Prefix() : View requires Valid? { + View(s, beg, point) + } + + function Suffix() : Cursor requires Valid? { + this.(beg := point) + } + + function Split() : (sp: Split) requires Valid? + ensures sp.SplitFrom?(this, (v: View) => v.Bytes()) + ensures beg != point ==> sp.StrictlySplitFrom?(this, (v: View) => v.Bytes()) + { + SP(this.Prefix(), this.Suffix()) + } + + function PrefixLength() : uint32 requires Valid? { + point - beg + } + + function SuffixLength() : uint32 requires Valid? { + end - point + } + + function Length() : uint32 requires Valid? { + end - beg + } + + lemma PrefixSuffixLength() + requires Valid? + ensures Length() == PrefixLength() + SuffixLength() + {} + + ghost predicate ValidIndex?(idx: uint32) { + beg as int + idx as int < end as int + } + + function At(idx: uint32) : byte + requires Valid? + requires ValidIndex?(idx) + { + s[beg + idx] + } + + ghost predicate ValidSuffixIndex?(idx: uint32) { + point as int + idx as int < end as int + } + + function SuffixAt(idx: uint32) : byte + requires Valid? + requires ValidSuffixIndex?(idx) + { + s[point + idx] + } + + function Peek(): (r: opt_byte) + requires Valid? + ensures r < 0 <==> EOF? + { + if EOF? then -1 + else SuffixAt(0) as opt_byte + } + + predicate LookingAt(c: char): (b: bool) + requires Valid? + requires c as int < 256 + ensures b <==> !EOF? && SuffixAt(0) == c as byte + { + Peek() == c as opt_byte + } + + function Skip(n: uint32): (ps: Cursor) + requires Valid? + requires point as int + n as int <= end as int + ensures n == 0 ==> ps == this + ensures n > 0 ==> ps.StrictlyAdvancedFrom?(this) + { + this.(point := point + n) + } + + function Unskip(n: uint32): Cursor + requires Valid? + requires beg as int <= point as int - n as int + { + this.(point := point - n) + } + + function Get(err: R): (ppr: CursorResult) + requires Valid? + ensures ppr.Success? ==> ppr.value.StrictlyAdvancedFrom?(this) + { + if EOF? then Failure(OtherError(err)) + else Success(Skip(1)) + } + + function AssertByte(b: byte): (pr: CursorResult) + requires Valid? + ensures pr.Success? ==> !EOF? + ensures pr.Success? ==> s[point] == b + ensures pr.Success? ==> pr.value.StrictlyAdvancedFrom?(this) + { + var nxt := Peek(); + if nxt == b as opt_byte then Success(Skip(1)) + else Failure(ExpectingByte(b, nxt)) + } + + function {:tailrecursion} AssertBytes(bs: bytes, offset: uint32 := 0): (pr: CursorResult) + requires Valid? + requires |bs| < TWO_TO_THE_32 + requires offset <= |bs| as uint32 + requires forall b | b in bs :: b as int < 256 + decreases SuffixLength() + ensures pr.Success? ==> pr.value.AdvancedFrom?(this) + ensures pr.Success? && offset < |bs| as uint32 ==> pr.value.StrictlyAdvancedFrom?(this) + ensures pr.Success? ==> s[point..pr.value.point] == bs[offset..] + { + if offset == |bs| as uint32 then Success(this) + else + var ps :- AssertByte(bs[offset] as byte); + ps.AssertBytes(bs, offset + 1) + } + + function AssertChar(c0: char): (pr: CursorResult) + requires Valid? + requires c0 as int < 256 + ensures pr.Success? ==> pr.value.StrictlyAdvancedFrom?(this) + { + AssertByte(c0 as byte) + } + + function SkipByte(): (ps: Cursor) + requires Valid? + decreases SuffixLength() + ensures ps.AdvancedFrom?(this) + ensures !EOF? ==> ps.StrictlyAdvancedFrom?(this) + { + if EOF? then this + else Skip(1) + } + + function SkipIf(p: byte -> bool): (ps: Cursor) + requires Valid? + decreases SuffixLength() + ensures ps.AdvancedFrom?(this) + ensures !EOF? && p(SuffixAt(0)) ==> ps.StrictlyAdvancedFrom?(this) + { + if EOF? || !p(SuffixAt(0)) then this + else Skip(1) + } + + function SkipWhile(p: byte -> bool): (ps: Cursor) + requires Valid? + decreases SuffixLength() + ensures ps.AdvancedFrom?(this) + ensures forall idx | point <= idx < ps.point :: p(ps.s[idx]) + { + if EOF? || !p(SuffixAt(0)) then this + else Skip(1).SkipWhile(p) + } by method { + var point' := this.point; + var end := this.end; + while point' < end && p(this.s[point']) + invariant this.(point := point').Valid? + invariant this.(point := point').SkipWhile(p) == this.SkipWhile(p) + { + point' := point' + 1; + } + return Cursor(this.s, this.beg, point', this.end); + } + + function SkipWhileLexer(step: Lexer, st: A) + : (pr: CursorResult) + requires Valid? + decreases SuffixLength() + ensures pr.Success? ==> pr.value.AdvancedFrom?(this) + { + match step(st, Peek()) + case Accept => Success(this) + case Reject(err) => Failure(OtherError(err)) + case Partial(st) => + if EOF? then Failure(EOF) + else Skip(1).SkipWhileLexer(step, st) + } by method { + var point' := point; + var end := this.end; + var st' := st; + while true + invariant this.(point := point').Valid? + invariant this.(point := point').SkipWhileLexer(step, st') == this.SkipWhileLexer(step, st) + decreases this.(point := point').SuffixLength() + { + var eof := point' == end; + var minusone: opt_byte := -1; // BUG(https://github.com/dafny-lang/dafny/issues/2191) + var c := if eof then minusone else this.s[point'] as opt_byte; + match step(st', c) + case Accept => return Success(Cursor(this.s, this.beg, point', this.end)); + case Reject(err) => return Failure(OtherError(err)); + case Partial(st'') => + if eof { return Failure(EOF); } + else { st' := st''; point' := point' + 1; } + } + } + } +} diff --git a/src/JSON/Utils/Lexers.dfy b/src/JSON/Utils/Lexers.dfy new file mode 100644 index 00000000..a600b3a5 --- /dev/null +++ b/src/JSON/Utils/Lexers.dfy @@ -0,0 +1,53 @@ +// RUN: %verify "%s" + +include "../../Wrappers.dfy" +include "../../BoundedInts.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Lexers { + module Core { + import opened Wrappers + import opened BoundedInts + + datatype LexerResult<+T, +R> = + // A Lexer may return three results: + | Accept // The input is valid. + | Reject(err: R) // The input is not valid; `err` says why. + | Partial(st: T) // More input is needed to finish lexing. + + type Lexer = (T, opt_byte) -> LexerResult + } + + module Strings { + import opened Core + import opened BoundedInts + + type StringBodyLexerState = /* escaped: */ bool + const StringBodyLexerStart: StringBodyLexerState := false; + + function StringBody(escaped: StringBodyLexerState, byte: opt_byte) + : LexerResult + { + if byte == '\\' as opt_byte then Partial(!escaped) + else if byte == '\"' as opt_byte && !escaped then Accept + else Partial(false) + } + + datatype StringLexerState = Start | Body(escaped: bool) | End + const StringLexerStart: StringLexerState := Start; + + function String(st: StringLexerState, byte: opt_byte) + : LexerResult + { + match st + case Start() => + if byte == '\"' as opt_byte then Partial(Body(false)) + else Reject("String must start with double quote") + case End() => + Accept + case Body(escaped) => + if byte == '\\' as opt_byte then Partial(Body(!escaped)) + else if byte == '\"' as opt_byte && !escaped then Partial(End) + else Partial(Body(false)) + } + } +} diff --git a/src/JSON/Utils/Parsers.dfy b/src/JSON/Utils/Parsers.dfy new file mode 100644 index 00000000..637a24bd --- /dev/null +++ b/src/JSON/Utils/Parsers.dfy @@ -0,0 +1,63 @@ +// RUN: %verify "%s" + +include "../../BoundedInts.dfy" +include "../../Wrappers.dfy" +include "Cursors.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Parsers { + import opened BoundedInts + import opened Wrappers + + import opened Views.Core + import opened Cursors + + type SplitResult<+T, +R> = Result, CursorError> + + type Parser = p: Parser_ | p.Valid?() + // BUG(https://github.com/dafny-lang/dafny/issues/2103) + witness ParserWitness() // BUG(https://github.com/dafny-lang/dafny/issues/2175) + datatype Parser_ = Parser(fn: FreshCursor -> SplitResult, + ghost spec: T -> bytes) { + ghost predicate Valid?() { + forall cs': FreshCursor :: fn(cs').Success? ==> fn(cs').value.StrictlySplitFrom?(cs', spec) + } + } + + function {:opaque} ParserWitness(): (p: Parser_) + ensures p.Valid?() + { + Parser(_ => Failure(EOF), _ => []) + } + + // BUG(https://github.com/dafny-lang/dafny/issues/2137): It would be much + // nicer if `SubParser` was a special case of `Parser`, but that would require + // making `fn` in parser a partial function `-->`. The problem with that is + // that we would then have to restrict the `Valid?` clause of `Parser` on + // `fn.requires()`, thus making it unprovable in the `SubParser` case (`fn` + // for subparsers is typically a lambda, and the `requires` of lambdas are + // essentially uninformative/opaque). + datatype SubParser_ = SubParser( + ghost cs: Cursor, + ghost pre: FreshCursor -> bool, + fn: FreshCursor --> SplitResult, + ghost spec: T -> bytes) + { + ghost predicate Valid?() { + && (forall cs': FreshCursor | pre(cs') :: fn.requires(cs')) + && (forall cs': FreshCursor | cs'.StrictlySplitFrom?(cs) :: pre(cs')) + && (forall cs': FreshCursor | pre(cs') :: fn(cs').Success? ==> fn(cs').value.StrictlySplitFrom?(cs', spec)) + } + } + + type SubParser = p: SubParser_ | p.Valid?() + witness SubParserWitness() // BUG(https://github.com/dafny-lang/dafny/issues/2175) + + function {:opaque} SubParserWitness(): (subp: SubParser_) + ensures subp.Valid?() + { + SubParser(Cursor([], 0, 0, 0), + (cs: FreshCursor) => false, + (cs: FreshCursor) => Failure(EOF), + _ => []) + } +} diff --git a/src/JSON/Utils/Seq.dfy b/src/JSON/Utils/Seq.dfy new file mode 100644 index 00000000..3293db29 --- /dev/null +++ b/src/JSON/Utils/Seq.dfy @@ -0,0 +1,23 @@ +// RUN: %verify "%s" + +module {:options "-functionSyntax:4"} JSON.Utils.Seq { + lemma Neutral(l: seq) + ensures l == l + [] + {} + + lemma Assoc(a: seq, b: seq, c: seq) + // `a + b + c` is parsed as `(a + b) + c` + ensures a + b + c == a + (b + c) + {} + + + lemma Assoc'(a: seq, b: seq, c: seq) + // `a + b + c` is parsed as `(a + b) + c` + ensures a + (b + c) == a + b + c + {} + + lemma Assoc2(a: seq, b: seq, c: seq, d: seq) + // `a + b + c + d` is parsed as `((a + b) + c) + d` + ensures a + b + c + d == a + (b + c + d) + {} +} diff --git a/src/JSON/Utils/Str.dfy b/src/JSON/Utils/Str.dfy new file mode 100644 index 00000000..9f099419 --- /dev/null +++ b/src/JSON/Utils/Str.dfy @@ -0,0 +1,258 @@ +// RUN: %verify --disable-nonlinear-arithmetic "%s" + +include "../../BoundedInts.dfy" +include "../../Wrappers.dfy" +include "../../NonlinearArithmetic/Mul.dfy" +include "../../NonlinearArithmetic/DivMod.dfy" +include "../../NonlinearArithmetic/Logarithm.dfy" +include "../../NonlinearArithmetic/Power.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Str { + import opened Wrappers + import opened Power + import opened Logarithm + + abstract module ParametricConversion { + import opened Wrappers + import opened Mul + import opened DivMod + import opened Power + import opened Logarithm + + type Char(==) + type String = seq + + // FIXME the design in LittleEndianNat makes BASE a module-level constant + // instead of a function argument + function Digits(n: nat, base: int): (digits: seq) + requires base > 1 + decreases n + ensures n == 0 ==> |digits| == 0 + ensures n > 0 ==> |digits| == Log(base, n) + 1 + ensures forall d | d in digits :: 0 <= d < base + { + if n == 0 then + assert Pow(base, 0) == 1 by { reveal Pow(); } + [] + else + LemmaDivPosIsPosAuto(); LemmaDivDecreasesAuto(); + var digits' := Digits(n / base, base); + var digits := digits' + [n % base]; + assert |digits| == Log(base, n) + 1 by { + assert |digits| == |digits'| + 1; + if n < base { + LemmaLog0(base, n); + assert n / base == 0 by { LemmaBasicDiv(base); } + } else { + LemmaLogS(base, n); + assert n / base > 0 by { LemmaDivNonZeroAuto(); } + } + } + digits + } + + function OfDigits(digits: seq, chars: seq) : (str: String) + requires forall d | d in digits :: 0 <= d < |chars| + ensures forall c | c in str :: c in chars + ensures |str| == |digits| + { + if digits == [] then [] + else + assert digits[0] in digits; + assert forall d | d in digits[1..] :: d in digits; + [chars[digits[0]]] + OfDigits(digits[1..], chars) + } + + function OfNat_any(n: nat, chars: seq) : (str: String) + requires |chars| > 1 + ensures |str| == Log(|chars|, n) + 1 + ensures forall c | c in str :: c in chars + { + var base := |chars|; + if n == 0 then reveal Log(); [chars[0]] + else OfDigits(Digits(n, base), chars) + } + + predicate NumberStr(str: String, minus: Char, is_digit: Char -> bool) { + str != [] ==> + && (str[0] == minus || is_digit(str[0])) + && forall c | c in str[1..] :: is_digit(c) + } + + function OfInt_any(n: int, chars: seq, minus: Char) : (str: String) + requires |chars| > 1 + ensures NumberStr(str, minus, c => c in chars) + { + if n >= 0 then OfNat_any(n, chars) + else [minus] + OfNat_any(-n, chars) + } + + function ToNat_any(str: String, base: nat, digits: map) : (n: nat) + requires base > 0 + requires forall c | c in str :: c in digits + { + if str == [] then 0 + else + LemmaMulNonnegativeAuto(); + ToNat_any(str[..|str| - 1], base, digits) * base + digits[str[|str| - 1]] + } + + lemma {:induction false} ToNat_bound(str: String, base: nat, digits: map) + requires base > 0 + requires forall c | c in str :: c in digits + requires forall c | c in str :: digits[c] < base + ensures ToNat_any(str, base, digits) < Pow(base, |str|) + { + if str == [] { + reveal Pow(); + } else { + calc <= { + ToNat_any(str, base, digits); + ToNat_any(str[..|str| - 1], base, digits) * base + digits[str[|str| - 1]]; + ToNat_any(str[..|str| - 1], base, digits) * base + (base - 1); + { ToNat_bound(str[..|str| - 1], base, digits); + LemmaMulInequalityAuto(); } + (Pow(base, |str| - 1) - 1) * base + base - 1; + { LemmaMulIsDistributiveAuto(); } + Pow(base, |str| - 1) * base - 1; + { reveal Pow(); LemmaMulIsCommutativeAuto(); } + Pow(base, |str|) - 1; + } + } + } + + function ToInt_any(str: String, minus: Char, base: nat, digits: map) : (s: int) + requires base > 0 + requires str != [minus] + requires NumberStr(str, minus, c => c in digits) + { + if [minus] <= str then -(ToNat_any(str[1..], base, digits) as int) + else + assert str == [] || str == [str[0]] + str[1..]; + ToNat_any(str, base, digits) + } + } + + abstract module ParametricEscaping { + import opened Wrappers + + type Char(==) + type String = seq + + function Escape(str: String, special: set, escape: Char): String { + if str == [] then str + else if str[0] in special then [escape, str[0]] + Escape(str[1..], special, escape) + else [str[0]] + Escape(str[1..], special, escape) + } + + datatype UnescapeError = + EscapeAtEOS + + function Unescape(str: String, escape: Char): Result { + if str == [] then Success(str) + else if str[0] == escape then + if |str| > 1 then var tl :- Unescape(str[2..], escape); Success([str[1]] + tl) + else Failure(EscapeAtEOS) + else var tl :- Unescape(str[1..], escape); Success([str[0]] + tl) + } + + lemma {:induction false} Unescape_Escape(str: String, special: set, escape: Char) + requires escape in special + ensures Unescape(Escape(str, special, escape), escape) == Success(str) + { + if str == [] { + } else { + assert str == [str[0]] + str[1..]; + Unescape_Escape(str[1..], special, escape); + } + } + } + + module CharStrConversion refines ParametricConversion { + type Char = char + } + + module CharStrEscaping refines ParametricEscaping { + type Char = char + } + + const HEX_DIGITS: seq := "0123456789ABCDEF" + + const HEX_TABLE := + map[ + '0' := 0, '1' := 1, '2' := 2, '3' := 3, '4' := 4, '5' := 5, '6' := 6, '7' := 7, '8' := 8, '9' := 9, + 'a' := 0xA, 'b' := 0xB, 'c' := 0xC, 'd' := 0xD, 'e' := 0xE, 'f' := 0xF, + 'A' := 0xA, 'B' := 0xB, 'C' := 0xC, 'D' := 0xD, 'E' := 0xE, 'F' := 0xF + ] + + function OfNat(n: nat, base: int := 10) : (str: string) + requires 2 <= base <= 16 + ensures |str| == Log(base, n) + 1 + ensures forall c | c in str :: c in HEX_DIGITS[..base] + { + CharStrConversion.OfNat_any(n, HEX_DIGITS[..base]) + } + + function OfInt(n: int, base: int := 10) : (str: string) + requires 2 <= base <= 16 + ensures CharStrConversion.NumberStr(str, '-', c => c in HEX_DIGITS[..base]) + { + CharStrConversion.OfInt_any(n, HEX_DIGITS[..base], '-') + } + + function ToNat(str: string, base: int := 10) : (n: nat) + requires 2 <= base <= 16 + requires forall c | c in str :: c in HEX_TABLE && HEX_TABLE[c] as int < base + ensures n < Pow(base, |str|) + { + CharStrConversion.ToNat_bound(str, base, HEX_TABLE); + CharStrConversion.ToNat_any(str, base, HEX_TABLE) + } + + function ToInt(str: string, base: int := 10) : (n: int) + requires str != "-" + requires 2 <= base <= 16 + requires CharStrConversion.NumberStr(str, '-', (c: char) => c in HEX_TABLE && HEX_TABLE[c] as int < base) + { + CharStrConversion.ToInt_any(str, '-', base, HEX_TABLE) + } + + function EscapeQuotes(str: string): string { + CharStrEscaping.Escape(str, {'\"', '\''}, '\\') + } + + function UnescapeQuotes(str: string): Result { + CharStrEscaping.Unescape(str, '\\') + } + + method Test() { // FIXME {:test}? + expect OfInt(0, 10) == "0"; + expect OfInt(3, 10) == "3"; + expect OfInt(302, 10) == "302"; + expect OfInt(-3, 10) == "-3"; + expect OfInt(-302, 10) == "-302"; + } + + function OfBool(b: bool) : string { + if b then "true" else "false" + } + + function OfChar(c: char) : string { + [c] + } + + function Join(sep: string, strs: seq) : string { + if |strs| == 0 then "" + else if |strs| == 1 then strs[0] + else strs[0] + sep + Join(sep, strs[1..]) + } + + function Concat(strs: seq) : string { + if |strs| == 0 then "" + else strs[0] + Concat(strs[1..]) + } + + lemma Concat_Join(strs: seq) + ensures Concat(strs) == Join("", strs) + {} +} diff --git a/src/JSON/Utils/Vectors.dfy b/src/JSON/Utils/Vectors.dfy new file mode 100644 index 00000000..726e4d4d --- /dev/null +++ b/src/JSON/Utils/Vectors.dfy @@ -0,0 +1,217 @@ +// RUN: %verify "%s" + +include "../../BoundedInts.dfy" +include "../../Wrappers.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Vectors { + import opened BoundedInts + import opened Wrappers + + datatype VectorError = OutOfMemory + const OOM_FAILURE := Fail(OutOfMemory) + + class Vector { + ghost var items : seq + ghost var Repr : set + + const a: A + var size: uint32 + var capacity: uint32 + var data: array + + const MAX_CAPACITY: uint32 := UINT32_MAX + const MAX_CAPACITY_BEFORE_DOUBLING: uint32 := UINT32_MAX / 2 + + ghost predicate Valid?() + reads this, Repr + ensures Valid?() ==> this in Repr + { + && Repr == {this, data} + && capacity != 0 + && data.Length == capacity as int + && size <= capacity + && size as int == |items| + && items == data[..size] + } + + constructor(a0: A, initial_capacity: uint32 := 8) + requires initial_capacity > 0 + ensures size == 0 + ensures items == [] + ensures fresh(Repr) + ensures Valid?() + { + a := a0; + items := []; + size := 0; + capacity := initial_capacity; + data := new A[initial_capacity](_ => a0); + Repr := {this, data}; + } + + function At(idx: uint32) : (a: A) + reads this, this.data + requires idx < size + requires Valid?() + ensures a == data[idx] == items[idx] + { + data[idx] + } + + function Top() : (a: A) + reads this, this.data + requires 0 < size + requires Valid?() + ensures a == data[size - 1] == items[size - 1] + { + data[size - 1] + } + + method Put(idx: uint32, a: A) + requires idx < size + requires Valid?() + modifies data, `items + ensures Valid?() + ensures items == old(items)[idx := a] + { + data[idx] := a; + items := items[idx := a]; + } + + method CopyFrom(new_data: array, count: uint32) + requires count as int <= new_data.Length + requires count <= capacity + requires data.Length == capacity as int + modifies data + ensures data[..count] == new_data[..count] + ensures data[count..] == old(data[count..]) + { + for idx: uint32 := 0 to count + invariant idx <= capacity + invariant data.Length == capacity as int + invariant data[..idx] == new_data[..idx] + invariant data[count..] == old(data[count..]) + { + data[idx] := new_data[idx]; + } + } + + method Realloc(new_capacity: uint32) + requires Valid?() + requires new_capacity > capacity + modifies `capacity, `data, `Repr, data + ensures Valid?() + ensures capacity == new_capacity + ensures fresh(data) + { + var old_data, old_capacity := data, capacity; + data, capacity := new A[new_capacity](_ => a), new_capacity; + CopyFrom(old_data, old_capacity); + Repr := {this, data}; + } + + function DefaultNewCapacity(capacity: uint32) : uint32 { + if capacity < MAX_CAPACITY_BEFORE_DOUBLING + then 2 * capacity + else MAX_CAPACITY + } + + method ReallocDefault() returns (o: Outcome) + requires Valid?() + modifies `capacity, `data, `Repr, data + ensures Valid?() + ensures o.Fail? <==> old(capacity) == MAX_CAPACITY + ensures o.Fail? ==> + && unchanged(this) + && unchanged(data) + ensures o.Pass? ==> + && fresh(data) + && old(capacity) < MAX_CAPACITY + && capacity == old(if capacity < MAX_CAPACITY_BEFORE_DOUBLING + then 2 * capacity else MAX_CAPACITY) + + { + if capacity == MAX_CAPACITY { + return Fail(OutOfMemory); + } + Realloc(DefaultNewCapacity(capacity)); + return Pass; + } + + method Ensure(reserved: uint32) returns (o: Outcome) + requires Valid?() + modifies this, `data + ensures Valid?() + modifies `capacity, `data, `Repr, data + ensures reserved <= capacity - size ==> + o.Pass? + ensures o.Pass? ==> + old(size as int + reserved as int) <= capacity as int + ensures o.Fail? ==> + reserved > MAX_CAPACITY - size + { + if reserved > MAX_CAPACITY - size { + return Fail(OutOfMemory); + } + if reserved <= capacity - size { + return Pass; + } + var new_capacity := capacity; + while reserved > new_capacity - size + decreases MAX_CAPACITY - new_capacity + invariant new_capacity >= capacity + { + new_capacity := DefaultNewCapacity(new_capacity); + } + Realloc(new_capacity); + return Pass; + } + + method PopFast() + requires Valid?() + requires size > 0 + modifies `size, `items + ensures Valid?() + ensures size == old(size) - 1 + ensures items == old(items[..|items| - 1]) + { + size := size - 1; + items := items[..|items| - 1]; + } + + method PushFast(a: A) + requires Valid?() + requires size < capacity + modifies `size, `items, data + ensures Valid?() + ensures size == old(size) + 1 + ensures items == old(items) + [a] + { + data[size] := a; + size := size + 1; + items := items + [a]; + } + + method Push(a: A) returns (o: Outcome) + requires Valid?() + modifies this, data + ensures Valid?() + ensures o.Fail? ==> + && unchanged(this) + && unchanged(data) + ensures o.Pass? ==> + && old(size) < MAX_CAPACITY + && size == old(size) + 1 + && items == old(items) + [a] + && capacity >= old(capacity) + && if old(size == capacity) then fresh(data) else unchanged(`data) + { + if size == capacity { + var d := ReallocDefault(); + if d.Fail? { return d; } + } + PushFast(a); + return Pass; + } + } +} diff --git a/src/JSON/Utils/Views.Writers.dfy b/src/JSON/Utils/Views.Writers.dfy new file mode 100644 index 00000000..07656761 --- /dev/null +++ b/src/JSON/Utils/Views.Writers.dfy @@ -0,0 +1,121 @@ +// RUN: %verify "%s" + +include "../../BoundedInts.dfy" +include "../../Wrappers.dfy" +include "Views.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Views.Writers { + import opened BoundedInts + import opened Wrappers + + import opened Core + + datatype Chain = + | Empty + | Chain(previous: Chain, v: View) + { + function Length() : nat { + if Empty? then 0 + else previous.Length() + v.Length() as int + } + + function Count() : nat { + if Empty? then 0 + else previous.Count() + 1 + } + + function Bytes() : (bs: bytes) + ensures |bs| == Length() + { + if Empty? then [] + else previous.Bytes() + v.Bytes() + } + + function Append(v': View): (c: Chain) + ensures c.Bytes() == Bytes() + v'.Bytes() + { + if Chain? && Adjacent(v, v') then + Chain(previous, Merge(v, v')) + else + Chain(this, v') + } + + method {:tailrecursion} CopyTo(dest: array, end: uint32) + requires end as int == Length() <= dest.Length + modifies dest + ensures dest[..end] == Bytes() + ensures dest[end..] == old(dest[end..]) + { + if Chain? { + var end := end - v.Length(); + v.CopyTo(dest, end); + previous.CopyTo(dest, end); + } + } + } + + type Writer = w: Writer_ | w.Valid? witness Writer(0, Chain.Empty) + datatype Writer_ = Writer(length: uint32, chain: Chain) + { + static const Empty: Writer := Writer(0, Chain.Empty) + + const Empty? := chain.Empty? + const Unsaturated? := length != UINT32_MAX + + ghost function Length() : nat { chain.Length() } + + ghost const Valid? := + length == // length is a saturating counter + if chain.Length() >= TWO_TO_THE_32 then UINT32_MAX + else chain.Length() as uint32 + + function Bytes() : (bs: bytes) + ensures |bs| == Length() + { + chain.Bytes() + } + + static function SaturatedAddU32(a: uint32, b: uint32): uint32 { + if a <= UINT32_MAX - b then a + b + else UINT32_MAX + } + + function {:opaque} Append(v': View): (rw: Writer) + requires Valid? + ensures rw.Unsaturated? <==> v'.Length() < UINT32_MAX - length + ensures rw.Bytes() == Bytes() + v'.Bytes() + { + Writer(SaturatedAddU32(length, v'.Length()), + chain.Append(v')) + } + + function Then(fn: Writer ~> Writer) : Writer + reads fn.reads + requires Valid? + requires fn.requires(this) + { + fn(this) + } + + method {:tailrecursion} CopyTo(dest: array) + requires Valid? + requires Unsaturated? + requires Length() <= dest.Length + modifies dest + ensures dest[..length] == Bytes() + ensures dest[length..] == old(dest[length..]) + { + chain.CopyTo(dest, length); + } + + method ToArray() returns (bs: array) + requires Valid? + requires Unsaturated? + ensures fresh(bs) + ensures bs[..] == Bytes() + { + bs := new byte[length]; + CopyTo(bs); + } + } +} diff --git a/src/JSON/Utils/Views.dfy b/src/JSON/Utils/Views.dfy new file mode 100644 index 00000000..d655b359 --- /dev/null +++ b/src/JSON/Utils/Views.dfy @@ -0,0 +1,123 @@ +// RUN: %verify "%s" + +include "../../BoundedInts.dfy" + +module {:options "-functionSyntax:4"} JSON.Utils.Views.Core { + import opened BoundedInts + + type View = v: View_ | v.Valid? witness View([], 0, 0) + datatype View_ = View(s: bytes, beg: uint32, end: uint32) { + ghost const Valid?: bool := + 0 <= beg as int <= end as int <= |s| < TWO_TO_THE_32; + + static const Empty: View := + View([], 0, 0) + + const Empty? := + beg == end + + function Length(): uint32 requires Valid? { + end - beg + } + + function Bytes(): bytes requires Valid? { + s[beg..end] + } + + static function OfBytes(bs: bytes) : (v: View) + requires |bs| < TWO_TO_THE_32 + ensures v.Bytes() == bs + { + View(bs, 0 as uint32, |bs| as uint32) + } + + static function OfString(s: string) : bytes + requires forall c: char | c in s :: c as int < 256 + { + seq(|s|, i requires 0 <= i < |s| => + assert s[i] in s; s[i] as byte) + } + + ghost predicate SliceOf?(v': View) { + v'.s == s && v'.beg <= beg && end <= v'.end + } + + ghost predicate StrictPrefixOf?(v': View) { + v'.s == s && v'.beg == beg && end < v'.end + } + + ghost predicate StrictSuffixOf?(v': View) { + v'.s == s && v'.beg < beg && end == v'.end + } + + predicate Byte?(c: byte) + requires Valid? + { + Bytes() == [c] + } by method { + return Length() == 1 && At(0) == c; + } + + predicate Char?(c: char) + requires Valid? + requires c as int < 256 + { + Byte?(c as byte) + } + + ghost predicate ValidIndex?(idx: uint32) { + beg as int + idx as int < end as int + } + + function At(idx: uint32) : byte + requires Valid? + requires ValidIndex?(idx) + { + s[beg + idx] + } + + function Peek(): (r: opt_byte) + requires Valid? + ensures r < 0 <==> Empty? + { + if Empty? then -1 + else At(0) as opt_byte + } + + method CopyTo(dest: array, start: uint32 := 0) + requires Valid? + requires start as int + Length() as int <= dest.Length + requires start as int + Length() as int < TWO_TO_THE_32 + modifies dest + ensures dest[start..start + Length()] == Bytes() + ensures dest[start + Length()..] == old(dest[start + Length()..]) + { + for idx := 0 to Length() + invariant dest[start..start + idx] == Bytes()[..idx] + invariant dest[start + Length()..] == old(dest[start + Length()..]) + { + dest[start + idx] := s[beg + idx]; + } + } + } + + predicate Adjacent(lv: View, rv: View) { + // Compare endpoints first to short-circuit the potentially-costly string + // comparison + && lv.end == rv.beg + // We would prefer to use reference equality here, but doing so in a sound + // way is tricky (see chapter 9 of ‘Verasco: a Formally Verified C Static + // Analyzer’ by Jacques-Henri Jourdan for details). The runtime optimizes + // the common case of physical equality and otherwise performs a length + // check, so the worst case (checking for adjacency in two slices that have + // equal but not physically-equal contents) is hopefully not too common. + && lv.s == rv.s + } + + function Merge(lv: View, rv: View) : (v: View) + requires Adjacent(lv, rv) + ensures v.Bytes() == lv.Bytes() + rv.Bytes() + { + lv.(end := rv.end) + } +} diff --git a/src/JSON/Values.dfy b/src/JSON/Values.dfy new file mode 100644 index 00000000..76952ec2 --- /dev/null +++ b/src/JSON/Values.dfy @@ -0,0 +1,18 @@ +// RUN: %verify "%s" + +module {:options "-functionSyntax:4"} JSON.Values { + datatype Decimal = + Decimal(n: int, e10: int) // (n) * 10^(e10) + + function Int(n: int): Decimal { + Decimal(n, 0) + } + + datatype JSON = + | Null + | Bool(b: bool) + | String(str: string) + | Number(num: Decimal) + | Object(obj: seq<(string, JSON)>) // Not a map to preserve order + | Array(arr: seq) +} diff --git a/src/JSON/ZeroCopy/API.dfy b/src/JSON/ZeroCopy/API.dfy new file mode 100644 index 00000000..21b1cc84 --- /dev/null +++ b/src/JSON/ZeroCopy/API.dfy @@ -0,0 +1,47 @@ +// RUN: %verify "%s" --unicode-char:false ../../Unicode/UnicodeStringsWithoutUnicodeChar.dfy +// RUN: %verify "%s" --unicode-char:true ../../Unicode/UnicodeStringsWithUnicodeChar.dfy + +include "../Grammar.dfy" +include "../ConcreteSyntax.Spec.dfy" +include "Serializer.dfy" +include "Deserializer.dfy" + +module {:options "-functionSyntax:4"} JSON.ZeroCopy.API { + import opened BoundedInts + import opened Wrappers + + import opened Errors + import Grammar + import ConcreteSyntax.Spec + import Serializer + import Deserializer + + function {:opaque} Serialize(js: Grammar.JSON) : (bs: SerializationResult>) + ensures bs == Success(Spec.JSON(js)) + { + Success(Serializer.Text(js).Bytes()) + } + + method SerializeAlloc(js: Grammar.JSON) returns (bs: SerializationResult>) + ensures bs.Success? ==> fresh(bs.value) + ensures bs.Success? ==> bs.value[..] == Spec.JSON(js) + { + bs := Serializer.Serialize(js); + } + + method SerializeInto(js: Grammar.JSON, bs: array) returns (len: SerializationResult) + modifies bs + ensures len.Success? ==> len.value as int <= bs.Length + ensures len.Success? ==> bs[..len.value] == Spec.JSON(js) + ensures len.Success? ==> bs[len.value..] == old(bs[len.value..]) + ensures len.Failure? ==> unchanged(bs) + { + len := Serializer.SerializeTo(js, bs); + } + + function {:opaque} Deserialize(bs: seq) : (js: DeserializationResult) + ensures js.Success? ==> bs == Spec.JSON(js.value) + { + Deserializer.API.OfBytes(bs) + } +} diff --git a/src/JSON/ZeroCopy/Deserializer.dfy b/src/JSON/ZeroCopy/Deserializer.dfy new file mode 100644 index 00000000..1f4a0adb --- /dev/null +++ b/src/JSON/ZeroCopy/Deserializer.dfy @@ -0,0 +1,786 @@ +// RUN: %verify "%s" + +include "../Utils/Seq.dfy" +include "../Errors.dfy" +include "../Grammar.dfy" +include "../ConcreteSyntax.Spec.dfy" +include "../ConcreteSyntax.SpecProperties.dfy" +include "../Utils/Parsers.dfy" + +module {:options "-functionSyntax:4"} JSON.ZeroCopy.Deserializer { + module Core { + import opened BoundedInts + import opened Wrappers + + import ConcreteSyntax.Spec + import Vs = Utils.Views.Core + import opened Utils.Cursors + import opened Utils.Parsers + import opened Grammar + import Errors + import opened Seq = Utils.Seq + + + type JSONError = Errors.DeserializationError + type Error = CursorError + type ParseResult<+T> = SplitResult + type Parser = Parsers.Parser + type SubParser = Parsers.SubParser + + // BUG(https://github.com/dafny-lang/dafny/issues/2179) + const SpecView := (v: Vs.View) => Spec.View(v); + + function {:opaque} Get(cs: FreshCursor, err: JSONError): (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, SpecView) + { + var cs :- cs.Get(err); + Success(cs.Split()) + } + + function {:opaque} WS(cs: FreshCursor): (sp: Split) + ensures sp.SplitFrom?(cs, SpecView) + { + cs.SkipWhile(Blank?).Split() + } by method { + reveal WS(); + var point' := cs.point; + var end := cs.end; + while point' < end && Blank?(cs.s[point']) + invariant cs.(point := point').Valid? + invariant cs.(point := point').SkipWhile(Blank?) == cs.SkipWhile(Blank?) + { + point' := point' + 1; + } + return Cursor(cs.s, cs.beg, point', cs.end).Split(); + } + + function {:opaque} Structural(cs: FreshCursor, parser: Parser) + : (pr: ParseResult>) + requires forall cs :: parser.fn.requires(cs) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, st => Spec.Structural(st, parser.spec)) + { + var SP(before, cs) := WS(cs); + var SP(val, cs) :- parser.fn(cs); + var SP(after, cs) := WS(cs); + Success(SP(Grammar.Structural(before, val, after), cs)) + } + + type jopt = v: Vs.View | v.Length() <= 1 witness Vs.View.OfBytes([]) + + function {:opaque} TryStructural(cs: FreshCursor) + : (sp: Split>) + ensures sp.SplitFrom?(cs, st => Spec.Structural(st, SpecView)) + { + var SP(before, cs) := WS(cs); + var SP(val, cs) := cs.SkipByte().Split(); + var SP(after, cs) := WS(cs); + SP(Grammar.Structural(before, val, after), cs) + } + + type ValueParser = sp: SubParser | + forall t :: sp.spec(t) == Spec.Value(t) + witness * + } + type Error = Core.Error + + abstract module SequenceParams { + import opened BoundedInts + + import opened Grammar + import opened Utils.Cursors + import opened Core + + const OPEN: byte + const CLOSE: byte + + type TElement + + ghost function ElementSpec(t: TElement): bytes + + function Element(cs: FreshCursor, json: ValueParser) + : (pr: ParseResult) + requires cs.StrictlySplitFrom?(json.cs) + decreases cs.Length() + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, ElementSpec) + } + + abstract module Sequences { + import opened Wrappers + import opened BoundedInts + import opened Params: SequenceParams + + import ConcreteSyntax.SpecProperties + import opened Vs = Utils.Views.Core + import opened Grammar + import opened Utils.Cursors + import Utils.Parsers + import opened Core + + const SEPARATOR: byte := ',' as byte + + type jopen = v: Vs.View | v.Byte?(OPEN) witness Vs.View.OfBytes([OPEN]) + type jclose = v: Vs.View | v.Byte?(CLOSE) witness Vs.View.OfBytes([CLOSE]) + type TBracketed = Bracketed + type TSuffixedElement = Suffixed + + ghost function SuffixedElementSpec(e: TSuffixedElement): bytes { + ElementSpec(e.t) + Spec.CommaSuffix(e.suffix) + } + + ghost function BracketedSpec(ts: TBracketed): bytes { + Spec.Bracketed(ts, SuffixedElementSpec) + } + + ghost function SuffixedElementsSpec(ts: seq): bytes { + Spec.ConcatBytes(ts, SuffixedElementSpec) + } + + function {:opaque} Open(cs: FreshCursor) + : (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, _ => [OPEN]) + { + var cs :- cs.AssertByte(OPEN); + Success(cs.Split()) + } + + function {:opaque} Close(cs: FreshCursor) + : (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, _ => [CLOSE]) + { + var cs :- cs.AssertByte(CLOSE); + Success(cs.Split()) + } + + function {:opaque} BracketedFromParts(ghost cs: Cursor, + open: Split>, + elems: Split>, + close: Split>) + : (sp: Split) + requires Grammar.NoTrailingSuffix(elems.t) + requires open.StrictlySplitFrom?(cs, c => Spec.Structural(c, SpecView)) + requires elems.SplitFrom?(open.cs, SuffixedElementsSpec) + requires close.StrictlySplitFrom?(elems.cs, c => Spec.Structural(c, SpecView)) + ensures sp.StrictlySplitFrom?(cs, BracketedSpec) + { + var sp := SP(Grammar.Bracketed(open.t, elems.t, close.t), close.cs); + calc { + cs.Bytes(); + Spec.Structural(open.t, SpecView) + open.cs.Bytes(); + { assert open.cs.Bytes() == SuffixedElementsSpec(elems.t) + elems.cs.Bytes(); } + Spec.Structural(open.t, SpecView) + (SuffixedElementsSpec(elems.t) + elems.cs.Bytes()); + { Seq.Assoc'(Spec.Structural(open.t, SpecView), SuffixedElementsSpec(elems.t), elems.cs.Bytes()); } + Spec.Structural(open.t, SpecView) + SuffixedElementsSpec(elems.t) + elems.cs.Bytes(); + { assert elems.cs.Bytes() == Spec.Structural(close.t, SpecView) + close.cs.Bytes(); } + Spec.Structural(open.t, SpecView) + SuffixedElementsSpec(elems.t) + (Spec.Structural(close.t, SpecView) + close.cs.Bytes()); + { Seq.Assoc'(Spec.Structural(open.t, SpecView) + SuffixedElementsSpec(elems.t), Spec.Structural(close.t, SpecView), close.cs.Bytes()); } + Spec.Structural(open.t, SpecView) + SuffixedElementsSpec(elems.t) + Spec.Structural(close.t, SpecView) + close.cs.Bytes(); + Spec.Bracketed(sp.t, SuffixedElementSpec) + close.cs.Bytes(); + } + assert sp.StrictlySplitFrom?(cs, BracketedSpec); + sp + } + + function {:opaque} AppendWithSuffix(ghost cs0: FreshCursor, + ghost json: ValueParser, + elems: Split>, + elem: Split, + sep: Split>) + : (elems': Split>) + requires elems.cs.StrictlySplitFrom?(json.cs) + requires elems.SplitFrom?(cs0, SuffixedElementsSpec) + requires elem.StrictlySplitFrom?(elems.cs, ElementSpec) + requires sep.StrictlySplitFrom?(elem.cs, c => Spec.Structural(c, SpecView)) + requires forall e | e in elems.t :: e.suffix.NonEmpty? + ensures elems'.StrictlySplitFrom?(cs0, SuffixedElementsSpec) + ensures forall e | e in elems'.t :: e.suffix.NonEmpty? + ensures elems'.cs.Length() < elems.cs.Length() + ensures elems'.cs.StrictlySplitFrom?(json.cs) + { + var suffixed := Suffixed(elem.t, NonEmpty(sep.t)); + var elems' := SP(elems.t + [suffixed], sep.cs); // DISCUSS: Moving this down doubles the verification time + + assert cs0.Bytes() == SuffixedElementsSpec(elems'.t) + sep.cs.Bytes() by { + assert {:focus} cs0.Bytes() == SuffixedElementsSpec(elems.t) + (ElementSpec(suffixed.t) + Spec.CommaSuffix(suffixed.suffix)) + sep.cs.Bytes() by { + assert cs0.Bytes() == SuffixedElementsSpec(elems.t) + ElementSpec(suffixed.t) + Spec.CommaSuffix(suffixed.suffix) + sep.cs.Bytes() by { + assert cs0.Bytes() == SuffixedElementsSpec(elems.t) + elems.cs.Bytes(); + assert elems.cs.Bytes() == ElementSpec(suffixed.t) + elem.cs.Bytes(); + assert elem.cs.Bytes() == Spec.CommaSuffix(suffixed.suffix) + sep.cs.Bytes(); + Seq.Assoc'(SuffixedElementsSpec(elems.t), ElementSpec(suffixed.t), elem.cs.Bytes()); + Seq.Assoc'(SuffixedElementsSpec(elems.t) + ElementSpec(suffixed.t), Spec.CommaSuffix(suffixed.suffix), sep.cs.Bytes()); + } + Seq.Assoc(SuffixedElementsSpec(elems.t), ElementSpec(suffixed.t), Spec.CommaSuffix(suffixed.suffix)); + } + assert SuffixedElementsSpec(elems.t) + (ElementSpec(suffixed.t) + Spec.CommaSuffix(suffixed.suffix)) + sep.cs.Bytes() == SuffixedElementsSpec(elems'.t) + sep.cs.Bytes() by { + assert SuffixedElementsSpec(elems.t) + SuffixedElementSpec(suffixed) == SuffixedElementsSpec(elems.t + [suffixed]) by { + SpecProperties.ConcatBytes_Linear(elems.t, [suffixed], SuffixedElementSpec); + assert Spec.ConcatBytes(elems.t, SuffixedElementSpec) + Spec.ConcatBytes([suffixed], SuffixedElementSpec) == Spec.ConcatBytes(elems.t + [suffixed], SuffixedElementSpec); + } + } + } + assert elems'.StrictlySplitFrom?(cs0, SuffixedElementsSpec); + assert forall e | e in elems'.t :: e.suffix.NonEmpty? by { assert elems'.t == elems.t + [suffixed]; } + assert {:split_here} elems'.cs.Length() < elems.cs.Length(); + elems' + } + + function {:opaque} AppendLast(ghost cs0: FreshCursor, + ghost json: ValueParser, + elems: Split>, + elem: Split, + sep: Split>) + : (elems': Split>) + requires elems.cs.StrictlySplitFrom?(json.cs) + requires elems.SplitFrom?(cs0, SuffixedElementsSpec) + requires elem.StrictlySplitFrom?(elems.cs, ElementSpec) + requires sep.StrictlySplitFrom?(elem.cs, c => Spec.Structural(c, SpecView)) + requires forall e | e in elems.t :: e.suffix.NonEmpty? + ensures elems'.StrictlySplitFrom?(cs0, SuffixedElementsSpec) + ensures NoTrailingSuffix(elems'.t) + ensures elems'.cs.Length() < elems.cs.Length() + ensures elems'.cs.StrictlySplitFrom?(json.cs) + ensures sep.StrictlySplitFrom?(elems'.cs, c => Spec.Structural(c, SpecView)) + { + var suffixed := Suffixed(elem.t, Empty()); + var elems' := SP(elems.t + [suffixed], elem.cs); + + assert cs0.Bytes() == SuffixedElementsSpec(elems'.t) + elem.cs.Bytes() by { + assert cs0.Bytes() == SuffixedElementsSpec(elems.t) + ElementSpec(suffixed.t) + elem.cs.Bytes() by { + assert elem.t == suffixed.t; + } + assert SuffixedElementsSpec(elems.t) + ElementSpec(suffixed.t) + elem.cs.Bytes() == SuffixedElementsSpec(elems'.t) + elem.cs.Bytes() by { + assert SuffixedElementsSpec(elems.t) + SuffixedElementSpec(suffixed) == SuffixedElementsSpec(elems.t + [suffixed]) by { + SpecProperties.ConcatBytes_Linear(elems.t, [suffixed], SuffixedElementSpec); + assert Spec.ConcatBytes(elems.t, SuffixedElementSpec) + Spec.ConcatBytes([suffixed], SuffixedElementSpec) == Spec.ConcatBytes(elems.t + [suffixed], SuffixedElementSpec); + } + } + } + + assert elems'.StrictlySplitFrom?(cs0, SuffixedElementsSpec); + elems' + } + + // The implementation and proof of this function is more painful than + // expected due to the tail recursion. + function {:opaque} {:tailrecursion} Elements( + ghost cs0: FreshCursor, + json: ValueParser, + open: Split>, + elems: Split> + ) // DISCUSS: Why is this function reverified once per instantiation of the module? + : (pr: ParseResult) + requires open.StrictlySplitFrom?(cs0, c => Spec.Structural(c, SpecView)) + requires elems.cs.StrictlySplitFrom?(json.cs) + requires elems.SplitFrom?(open.cs, SuffixedElementsSpec) + requires forall e | e in elems.t :: e.suffix.NonEmpty? + decreases elems.cs.Length() + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs0, BracketedSpec) + { + var elem :- Element(elems.cs, json); + var sep := Core.TryStructural(elem.cs); + var s0 := sep.t.t.Peek(); + if s0 == SEPARATOR as opt_byte then + assert AppendWithSuffix.requires(open.cs, json, elems, elem, sep) by { + assert {:focus} elems.cs.StrictlySplitFrom?(json.cs); + assert elems.SplitFrom?(open.cs, SuffixedElementsSpec); + assert elem.StrictlySplitFrom?(elems.cs, ElementSpec); + assert sep.StrictlySplitFrom?(elem.cs, c => Spec.Structural(c, SpecView)); + assert forall e | e in elems.t :: e.suffix.NonEmpty?; + assert {:split_here} true; + } + var elems := AppendWithSuffix(open.cs, json, elems, elem, sep); + Elements(cs0, json, open, elems) + else if s0 == CLOSE as opt_byte then + assert AppendLast.requires(open.cs, json, elems, elem, sep) by { + assert sep.StrictlySplitFrom?(elem.cs, c => Spec.Structural(c, SpecView)); + assert elems.cs.StrictlySplitFrom?(json.cs); + assert elems.SplitFrom?(open.cs, SuffixedElementsSpec); + assert elem.StrictlySplitFrom?(elems.cs, ElementSpec); + } + var elems' := AppendLast(open.cs, json, elems, elem, sep); + assert elems'.SplitFrom?(open.cs, SuffixedElementsSpec) by { + assert elems'.StrictlySplitFrom?(open.cs, SuffixedElementsSpec); + } + var bracketed := BracketedFromParts(cs0, open, elems', sep); + assert bracketed.StrictlySplitFrom?(cs0, BracketedSpec); + Success(bracketed) + else + var separator := SEPARATOR; + var pr := Failure(ExpectingAnyByte([CLOSE, separator], s0)); + pr + } + + function {:opaque} Bracketed(cs: FreshCursor, json: ValueParser) + : (pr: ParseResult) + requires cs.SplitFrom?(json.cs) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, BracketedSpec) + { + var open :- Core.Structural(cs, Parsers.Parser(Open, SpecView)); + assert open.cs.StrictlySplitFrom?(json.cs); + var elems := SP([], open.cs); + if open.cs.Peek() == CLOSE as opt_byte then + var close :- Core.Structural(open.cs, Parsers.Parser(Close, SpecView)); + Success(BracketedFromParts(cs, open, elems, close)) + else + Elements(cs, json, open, elems) + } + + lemma Valid(x: TBracketed) + ensures x.l.t.Byte?(OPEN) + ensures x.r.t.Byte?(CLOSE) + ensures NoTrailingSuffix(x.data) + ensures forall pf | pf in x.data :: + pf.suffix.NonEmpty? ==> pf.suffix.t.t.Byte?(SEPARATOR) + { // DISCUSS: Why is this lemma needed? Why does it require a body? + var xlt: jopen := x.l.t; + var xrt: jclose := x.r.t; + forall pf | pf in x.data + ensures pf.suffix.NonEmpty? ==> pf.suffix.t.t.Byte?(SEPARATOR) + { + if pf.suffix.NonEmpty? { + var xtt := pf.suffix.t.t; + } + } + } + } + + module API { + import opened BoundedInts + import opened Wrappers + + import opened Vs = Utils.Views.Core + import opened Grammar + import opened Core + import opened Errors + import Utils.Cursors + import Values + + function LiftCursorError(err: Cursors.CursorError): DeserializationError { + match err + case EOF => ReachedEOF + case ExpectingByte(expected, b) => ExpectingByte(expected, b) + case ExpectingAnyByte(expected_sq, b) => ExpectingAnyByte(expected_sq, b) + case OtherError(err) => err + } + + function {:opaque} JSON(cs: Cursors.FreshCursor) : (pr: DeserializationResult>) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, Spec.JSON) + { + Core.Structural(cs, Parsers.Parser(Values.Value, Spec.Value)).MapFailure(LiftCursorError) + } + + function {:opaque} Text(v: View) : (jsr: DeserializationResult) + ensures jsr.Success? ==> v.Bytes() == Spec.JSON(jsr.value) + { + var SP(text, cs) :- JSON(Cursors.Cursor.OfView(v)); + assert Cursors.SP(text, cs).BytesSplitFrom?(Cursors.Cursor.OfView(v), Spec.JSON); + assert v.Bytes() == Spec.JSON(text) + cs.Bytes(); + :- Need(cs.EOF?, Errors.ExpectingEOF); + assert cs.Bytes() == []; + Success(text) + } + + function {:opaque} OfBytes(bs: bytes) : (jsr: DeserializationResult) + ensures jsr.Success? ==> bs == Spec.JSON(jsr.value) + { + :- Need(|bs| < TWO_TO_THE_32, Errors.IntOverflow); + Text(Vs.View.OfBytes(bs)) + } + } + + module Values { + import opened BoundedInts + import opened Wrappers + + import opened Grammar + import opened Utils.Cursors + import opened Core + + import Strings + import Numbers + import Objects + import Arrays + import Constants + + import ConcreteSyntax.SpecProperties + + function {:opaque} Value(cs: FreshCursor) : (pr: ParseResult) + decreases cs.Length(), 1 + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, Spec.Value) + { + var c := cs.Peek(); + if c == '{' as opt_byte then + var SP(obj, cs') :- Objects.Object(cs, ValueParser(cs)); + var v := Grammar.Object(obj); + var sp := SP(v, cs'); + assert sp.StrictlySplitFrom?(cs, Spec.Value) by { + assert SP(obj, cs').StrictlySplitFrom?(cs, Spec.Object); + Spec.UnfoldValueObject(v); + } + Success(sp) + else if c == '[' as opt_byte then + var SP(arr, cs') :- Arrays.Array(cs, ValueParser(cs)); + var v := Grammar.Array(arr); + var sp := SP(v, cs'); + assert sp.StrictlySplitFrom?(cs, Spec.Value) by { + assert SP(arr, cs').StrictlySplitFrom?(cs, Spec.Array); + Spec.UnfoldValueArray(v); + } + Success(sp) + else if c == '\"' as opt_byte then + var SP(str, cs) :- Strings.String(cs); + Success(SP(Grammar.String(str), cs)) + else if c == 't' as opt_byte then + var SP(cst, cs) :- Constants.Constant(cs, TRUE); + Success(SP(Grammar.Bool(cst), cs)) + else if c == 'f' as opt_byte then + var SP(cst, cs) :- Constants.Constant(cs, FALSE); + Success(SP(Grammar.Bool(cst), cs)) + else if c == 'n' as opt_byte then + var SP(cst, cs) :- Constants.Constant(cs, NULL); + Success(SP(Grammar.Null(cst), cs)) + else + var SP(num, cs') :- Numbers.Number(cs); + var v := Grammar.Number(num); + var sp := SP(v, cs'); + assert sp.StrictlySplitFrom?(cs, Spec.Value) by { + assert SP(num, cs').StrictlySplitFrom?(cs, Spec.Number); + Spec.UnfoldValueNumber(v); + } + assert sp.StrictlySplitFrom?(cs, Spec.Value); + Success(sp) + } + + function {:opaque} ValueParser(cs: FreshCursor) : (p: ValueParser) + decreases cs.Length(), 0 + ensures cs.SplitFrom?(p.cs) + { + var pre := (ps': FreshCursor) => ps'.Length() < cs.Length(); + var fn := (ps': FreshCursor) requires pre(ps') => Value(ps'); + Parsers.SubParser(cs, pre, fn, Spec.Value) + } + } + + module Constants { + import opened BoundedInts + import opened Wrappers + + import opened Grammar + import opened Core + import opened Utils.Cursors + + function {:opaque} Constant(cs: FreshCursor, expected: bytes) : (pr: ParseResult) + requires |expected| < TWO_TO_THE_32 + ensures pr.Success? ==> pr.value.t.Bytes() == expected + ensures pr.Success? ==> pr.value.SplitFrom?(cs, _ => expected) + { + var cs :- cs.AssertBytes(expected); + Success(cs.Split()) + } + } + + module Strings { + import opened Wrappers + import opened BoundedInts + + import opened Grammar + import opened Utils.Cursors + import opened LC = Utils.Lexers.Core + import opened Utils.Lexers.Strings + import opened Utils.Parsers + import opened Core + + function {:opaque} StringBody(cs: Cursor): (pr: CursorResult) + ensures pr.Success? ==> pr.value.AdvancedFrom?(cs) + { + cs.SkipWhileLexer(Strings.StringBody, StringBodyLexerStart) + } by method { + reveal StringBody(); + var escaped := false; + for point' := cs.point to cs.end + invariant cs.(point := point').Valid? + invariant cs.(point := point').SkipWhileLexer(Strings.StringBody, escaped) == StringBody(cs) + { + var byte := cs.s[point']; + if byte == '\"' as byte && !escaped { + return Success(Cursor(cs.s, cs.beg, point', cs.end)); + } else if byte == '\\' as byte { + escaped := !escaped; + } else { + escaped := false; + } + } + return Failure(EOF); + } + + function Quote(cs: FreshCursor) : (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, SpecView) + { + var cs :- cs.AssertChar('\"'); + Success(cs.Split()) + } + + function {:opaque} String(cs: FreshCursor): (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, Spec.String) + { + var SP(lq, cs) :- Quote(cs); + var contents :- StringBody(cs); + var SP(contents, cs) := contents.Split(); + var SP(rq, cs) :- Quote(cs); + Success(SP(Grammar.JString(lq, contents, rq), cs)) + } + } + + module Numbers { + import opened BoundedInts + import opened Wrappers + + import opened Grammar + import opened Utils.Cursors + import opened Core + + function {:opaque} Digits(cs: FreshCursor) : (sp: Split) + ensures sp.SplitFrom?(cs, SpecView) + { + cs.SkipWhile(Digit?).Split() + } + + function {:opaque} NonEmptyDigits(cs: FreshCursor) : (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, SpecView) + { + var sp := Digits(cs); + :- Need(!sp.t.Empty?, OtherError(Errors.EmptyNumber)); + Success(sp) + } + + function {:opaque} NonZeroInt(cs: FreshCursor) : (pr: ParseResult) + requires cs.Peek() != '0' as opt_byte + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, SpecView) + { + NonEmptyDigits(cs) + } + + function {:opaque} OptionalMinus(cs: FreshCursor) : (sp: Split) + ensures sp.SplitFrom?(cs, SpecView) + { + cs.SkipIf(c => c == '-' as byte).Split() + } + + function {:opaque} OptionalSign(cs: FreshCursor) : (sp: Split) + ensures sp.SplitFrom?(cs, SpecView) + { + cs.SkipIf(c => c == '-' as byte || c == '+' as byte).Split() + } + + function {:opaque} TrimmedInt(cs: FreshCursor) : (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, SpecView) + { + var sp := cs.SkipIf(c => c == '0' as byte).Split(); + if sp.t.Empty? then NonZeroInt(sp.cs) + else Success(sp) + } + + function {:opaque} {:vcs_split_on_every_assert} Exp(cs: FreshCursor) : (pr: ParseResult>) + ensures pr.Success? ==> pr.value.SplitFrom?(cs, exp => Spec.Maybe(exp, Spec.Exp)) + { + var SP(e, cs) := + cs.SkipIf(c => c == 'e' as byte || c == 'E' as byte).Split(); + if e.Empty? then + Success(SP(Empty(), cs)) + else + assert e.Char?('e') || e.Char?('E'); + var SP(sign, cs) := OptionalSign(cs); + var SP(num, cs) :- NonEmptyDigits(cs); + Success(SP(NonEmpty(JExp(e, sign, num)), cs)) + } + + function {:opaque} Frac(cs: FreshCursor) : (pr: ParseResult>) + ensures pr.Success? ==> pr.value.SplitFrom?(cs, frac => Spec.Maybe(frac, Spec.Frac)) + { + var SP(period, cs) := + cs.SkipIf(c => c == '.' as byte).Split(); + if period.Empty? then + Success(SP(Empty(), cs)) + else + var SP(num, cs) :- NonEmptyDigits(cs); + Success(SP(NonEmpty(JFrac(period, num)), cs)) + } + + function {:opaque} NumberFromParts( + ghost cs: Cursor, + minus: Split, num: Split, + frac: Split>, exp: Split> + ) + : (sp: Split) + requires minus.SplitFrom?(cs, SpecView) + requires num.StrictlySplitFrom?(minus.cs, SpecView) + requires frac.SplitFrom?(num.cs, frac => Spec.Maybe(frac, Spec.Frac)) + requires exp.SplitFrom?(frac.cs, exp => Spec.Maybe(exp, Spec.Exp)) + ensures sp.StrictlySplitFrom?(cs, Spec.Number) + { + var sp := SP(Grammar.JNumber(minus.t, num.t, frac.t, exp.t), exp.cs); + assert cs.Bytes() == Spec.Number(sp.t) + exp.cs.Bytes() by { + assert cs.Bytes() == Spec.View(minus.t) + Spec.View(num.t) + Spec.Maybe(frac.t, Spec.Frac) + Spec.Maybe(exp.t, Spec.Exp) + exp.cs.Bytes() by { + assert cs.Bytes() == Spec.View(minus.t) + minus.cs.Bytes(); + assert minus.cs.Bytes() == Spec.View(num.t) + num.cs.Bytes(); + assert num.cs.Bytes() == Spec.Maybe(frac.t, Spec.Frac) + frac.cs.Bytes(); + assert frac.cs.Bytes() == Spec.Maybe(exp.t, Spec.Exp) + exp.cs.Bytes(); + Seq.Assoc'(Spec.View(minus.t), Spec.View(num.t), num.cs.Bytes()); + Seq.Assoc'(Spec.View(minus.t) + Spec.View(num.t), Spec.Maybe(frac.t, Spec.Frac), frac.cs.Bytes()); + Seq.Assoc'(Spec.View(minus.t) + Spec.View(num.t) + Spec.Maybe(frac.t, Spec.Frac), Spec.Maybe(exp.t, Spec.Exp), exp.cs.Bytes()); + } + } + assert sp.StrictlySplitFrom?(cs, Spec.Number); + + + + sp + } + + function {:opaque} Number(cs: FreshCursor) : (pr: ParseResult) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, Spec.Number) + { + var minus := OptionalMinus(cs); + var num :- TrimmedInt(minus.cs); + var frac :- Frac(num.cs); + var exp :- Exp(frac.cs); + Success(NumberFromParts(cs, minus, num, frac, exp)) + } + } + + module ArrayParams refines SequenceParams { + import opened Strings + import opened Wrappers + + type TElement = Value + + const OPEN := '[' as byte + const CLOSE := ']' as byte + + function ElementSpec(t: TElement) : bytes { + Spec.Value(t) + } + function {:opaque} Element(cs: FreshCursor, json: ValueParser) : (pr: ParseResult) + { + json.fn(cs) + } + } + + module Arrays refines Sequences { + import opened Params = ArrayParams + + lemma BracketedToArray(arr: jarray) + ensures Spec.Bracketed(arr, SuffixedElementSpec) == Spec.Array(arr) + { + var rItem := (d: jitem) requires d < arr => Spec.Item(d); + assert Spec.Bracketed(arr, SuffixedElementSpec) == Spec.Bracketed(arr, rItem) by { + SpecProperties.Bracketed_Morphism(arr); + assert forall d | d < arr :: SuffixedElementSpec(d) == rItem(d); + } + calc { + Spec.Bracketed(arr, SuffixedElementSpec); + Spec.Bracketed(arr, rItem); + Spec.Array(arr); + } + } + + function {:opaque} Array(cs: FreshCursor, json: ValueParser) + : (pr: ParseResult) + requires cs.SplitFrom?(json.cs) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, Spec.Array) + { + var sp :- Bracketed(cs, json); + assert sp.StrictlySplitFrom?(cs, BracketedSpec); + BracketedToArray(sp.t); + Success(sp) + } + } + + module ObjectParams refines SequenceParams { + import opened Wrappers + import Strings + + type TElement = jKeyValue + + const OPEN := '{' as byte + const CLOSE := '}' as byte + + function Colon(cs: FreshCursor) : (pr: ParseResult) // DISCUSS: Why can't I make this opaque? + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, SpecView) + { + var cs :- cs.AssertChar(':'); + Success(cs.Split()) + } + + function {:opaque} KeyValueFromParts(ghost cs: Cursor, k: Split, + colon: Split>, v: Split) + : (sp: Split) + requires k.StrictlySplitFrom?(cs, Spec.String) + requires colon.StrictlySplitFrom?(k.cs, c => Spec.Structural(c, SpecView)) + requires v.StrictlySplitFrom?(colon.cs, Spec.Value) + ensures sp.StrictlySplitFrom?(cs, ElementSpec) + { + var sp := SP(Grammar.KeyValue(k.t, colon.t, v.t), v.cs); + assert cs.Bytes() == Spec.KeyValue(sp.t) + v.cs.Bytes() by { + assert cs.Bytes() == Spec.String(k.t) + Spec.Structural(colon.t, SpecView) + Spec.Value(v.t) + v.cs.Bytes() by { + assert cs.Bytes() == Spec.String(k.t) + k.cs.Bytes(); + assert k.cs.Bytes() == Spec.Structural(colon.t, SpecView) + colon.cs.Bytes(); + assert colon.cs.Bytes() == Spec.Value(v.t) + v.cs.Bytes(); + Seq.Assoc'(Spec.String(k.t), Spec.Structural(colon.t, SpecView), colon.cs.Bytes()); + Seq.Assoc'(Spec.String(k.t) + Spec.Structural(colon.t, SpecView), Spec.Value(v.t), v.cs.Bytes()); + } + } + assert sp.StrictlySplitFrom?(cs, ElementSpec); + sp + } + + function ElementSpec(t: TElement) : bytes { + Spec.KeyValue(t) + } + function {:opaque} Element(cs: FreshCursor, json: ValueParser) + : (pr: ParseResult) + { + var k :- Strings.String(cs); + assert k.cs.StrictlySplitFrom?(json.cs); + + var p := Parsers.Parser(Colon, SpecView); + assert p.Valid?(); + var colon :- Core.Structural(k.cs, p); + assert colon.StrictlySplitFrom?(k.cs, st => Spec.Structural(st, p.spec)); + assert colon.cs.StrictlySplitFrom?(json.cs); + + var v :- json.fn(colon.cs); + var kv := KeyValueFromParts(cs, k, colon, v); + Success(kv) + } + } + + module Objects refines Sequences { + import opened Params = ObjectParams + + lemma {:vcs_split_on_every_assert} BracketedToObject(obj: jobject) + ensures Spec.Bracketed(obj, SuffixedElementSpec) == Spec.Object(obj) + { + var rMember := (d: jmember) requires d < obj => Spec.Member(d); + assert Spec.Bracketed(obj, SuffixedElementSpec) == Spec.Bracketed(obj, rMember) by { + SpecProperties.Bracketed_Morphism(obj); + assert forall d | d < obj :: SuffixedElementSpec(d) == rMember(d); + } + calc { + Spec.Bracketed(obj, SuffixedElementSpec); + Spec.Bracketed(obj, rMember); + Spec.Object(obj); + } + } + + function {:opaque} Object(cs: FreshCursor, json: ValueParser) + : (pr: ParseResult) + requires cs.SplitFrom?(json.cs) + ensures pr.Success? ==> pr.value.StrictlySplitFrom?(cs, Spec.Object) + { + var sp :- Bracketed(cs, json); + assert sp.StrictlySplitFrom?(cs, BracketedSpec); + BracketedToObject(sp.t); + Success(sp) + } + } +} diff --git a/src/JSON/ZeroCopy/Serializer.dfy b/src/JSON/ZeroCopy/Serializer.dfy new file mode 100644 index 00000000..11030e2d --- /dev/null +++ b/src/JSON/ZeroCopy/Serializer.dfy @@ -0,0 +1,343 @@ +// RUN: %verify "%s" + +include "../Utils/Seq.dfy" +include "../Errors.dfy" +include "../ConcreteSyntax.Spec.dfy" +include "../ConcreteSyntax.SpecProperties.dfy" +include "../Utils/Views.Writers.dfy" + +module {:options "-functionSyntax:4"} JSON.ZeroCopy.Serializer { + import opened BoundedInts + import opened Wrappers + + import opened Seq = Utils.Seq + import opened Errors + import ConcreteSyntax.Spec + import ConcreteSyntax.SpecProperties + import opened Grammar + import opened Utils.Views.Writers + import opened Vs = Utils.Views.Core // DISCUSS: Module naming convention? + + method Serialize(js: JSON) returns (rbs: SerializationResult>) + ensures rbs.Success? ==> fresh(rbs.value) + ensures rbs.Success? ==> rbs.value[..] == Spec.JSON(js) + { + var writer := Text(js); + :- Need(writer.Unsaturated?, OutOfMemory); + var bs := writer.ToArray(); + return Success(bs); + } + + method SerializeTo(js: JSON, dest: array) returns (len: SerializationResult) + modifies dest + ensures len.Success? ==> len.value as int <= dest.Length + ensures len.Success? ==> dest[..len.value] == Spec.JSON(js) + ensures len.Success? ==> dest[len.value..] == old(dest[len.value..]) + ensures len.Failure? ==> unchanged(dest) + { + var writer := Text(js); + :- Need(writer.Unsaturated?, OutOfMemory); + :- Need(writer.length as int <= dest.Length, OutOfMemory); + writer.CopyTo(dest); + return Success(writer.length); + } + + function {:opaque} Text(js: JSON) : (wr: Writer) + ensures wr.Bytes() == Spec.JSON(js) + { + JSON(js) + } + + function {:opaque} JSON(js: JSON, writer: Writer := Writer.Empty) : (wr: Writer) + ensures wr.Bytes() == writer.Bytes() + Spec.JSON(js) + { + Seq.Assoc2(writer.Bytes(),js.before.Bytes(), Spec.Value(js.t), js.after.Bytes()); + writer + .Append(js.before) + .Then(wr => Value(js.t, wr)) + .Append(js.after) + } + + function {:opaque} {:vcs_split_on_every_assert} Value(v: Value, writer: Writer) : (wr: Writer) + decreases v, 4 + ensures wr.Bytes() == writer.Bytes() + Spec.Value(v) + { + match v + case Null(n) => writer.Append(n) + case Bool(b) => var wr := writer.Append(b); wr + case String(str) => var wr := String(str, writer); wr + case Number(num) => assert Grammar.Number(num) == v by { Spec.UnfoldValueNumber(v); } var wr := Number(num, writer); wr + case Object(obj) => assert Grammar.Object(obj) == v; assert Spec.Value(v) == Spec.Object(obj); var wr := Object(obj, writer); wr + case Array(arr) => assert Grammar.Array(arr) == v; assert Spec.Value(v) == Spec.Array(arr); var wr := Array(arr, writer); assert wr.Bytes() == writer.Bytes() + Spec.Value(v); wr + } + + function {:opaque} String(str: jstring, writer: Writer) : (wr: Writer) + decreases str, 0 + ensures wr.Bytes() == writer.Bytes() + Spec.String(str) + { + writer + .Append(str.lq) + .Append(str.contents) + .Append(str.rq) + } + + function {:opaque} Number(num: jnumber, writer: Writer) : (wr: Writer) + decreases num, 0 + ensures wr.Bytes() == writer.Bytes() + Spec.Number(num) + { + var wr := writer.Append(num.minus).Append(num.num); + + var wr := if num.frac.NonEmpty? then + wr.Append(num.frac.t.period).Append(num.frac.t.num) + else wr; + assert wr.Bytes() == writer.Bytes() + Spec.View(num.minus) + Spec.View(num.num) + Spec.Maybe(num.frac, Spec.Frac) by { + assert num.frac.Empty? ==> wr.Bytes() == writer.Bytes() + Spec.View(num.minus) + Spec.View(num.num) + []; + } + + var wr := if num.exp.NonEmpty? then + wr.Append(num.exp.t.e).Append(num.exp.t.sign).Append(num.exp.t.num) + else wr; + assert wr.Bytes() == writer.Bytes() + Spec.View(num.minus) + Spec.View(num.num) + Spec.Maybe(num.frac, Spec.Frac) + Spec.Maybe(num.exp, Spec.Exp) by { + if num.exp.NonEmpty? {} else { + assert wr.Bytes() == writer.Bytes() + Spec.View(num.minus) + Spec.View(num.num) + Spec.Maybe(num.frac, Spec.Frac); + assert wr.Bytes() == writer.Bytes() + Spec.View(num.minus) + Spec.View(num.num) + Spec.Maybe(num.frac, Spec.Frac) + []; + } + } + wr + } + + // DISCUSS: Can't be opaque, due to the lambda + function StructuralView(st: Structural, writer: Writer) : (wr: Writer) + ensures wr.Bytes() == writer.Bytes() + Spec.Structural(st, Spec.View) + { + writer.Append(st.before).Append(st.t).Append(st.after) + } + + lemma StructuralViewEns(st: Structural, writer: Writer) + ensures StructuralView(st, writer).Bytes() == writer.Bytes() + Spec.Structural(st, Spec.View) + {} + + lemma {:axiom} Assume(b: bool) ensures b + + // FIXME refactor below to merge + + lemma BracketedToObject(obj: jobject) + ensures Spec.Bracketed(obj, Spec.Member) == Spec.Object(obj) + { + var rMember := (d: jmember) requires d < obj => Spec.Member(d); + assert Spec.Bracketed(obj, Spec.Member) == Spec.Bracketed(obj, rMember) by { + // We call ``ConcatBytes`` with ``Spec.Member``, whereas the spec calls it + // with ``(d: jmember) requires d in obj.data => Spec.Member(d)``. That's + // why we need an explicit cast, which is performed by the lemma below. + SpecProperties.Bracketed_Morphism(obj); + assert forall d | d < obj :: Spec.Member(d) == rMember(d); + } + calc { + Spec.Bracketed(obj, Spec.Member); + Spec.Bracketed(obj, rMember); + Spec.Object(obj); + } + } + + function {:opaque} Object(obj: jobject, writer: Writer) : (wr: Writer) + decreases obj, 3 + ensures wr.Bytes() == writer.Bytes() + Spec.Object(obj) + { + var wr := StructuralView(obj.l, writer); + StructuralViewEns(obj.l, writer); + var wr := Members(obj, wr); + var wr := StructuralView(obj.r, wr); + Seq.Assoc2(writer.Bytes(), Spec.Structural(obj.l, Spec.View), Spec.ConcatBytes(obj.data, Spec.Member), Spec.Structural(obj.r, Spec.View)); + assert wr.Bytes() == writer.Bytes() + Spec.Bracketed(obj, Spec.Member); + assert Spec.Bracketed(obj, Spec.Member) == Spec.Object(obj) by { BracketedToObject(obj); } + wr + } + + lemma BracketedToArray(arr: jarray) + ensures Spec.Bracketed(arr, Spec.Item) == Spec.Array(arr) + { + var rItem := (d: jitem) requires d < arr => Spec.Item(d); + assert Spec.Bracketed(arr, Spec.Item) == Spec.Bracketed(arr, rItem) by { + SpecProperties.Bracketed_Morphism(arr); + assert forall d | d < arr :: Spec.Item(d) == rItem(d); + } + calc { + Spec.Bracketed(arr, Spec.Item); + Spec.Bracketed(arr, rItem); + Spec.Array(arr); + } + } + + function {:opaque} Array(arr: jarray, writer: Writer) : (wr: Writer) + decreases arr, 3 + ensures wr.Bytes() == writer.Bytes() + Spec.Array(arr) + { + var wr := StructuralView(arr.l, writer); + StructuralViewEns(arr.l, writer); + var wr := Items(arr, wr); + var wr := StructuralView(arr.r, wr); + Seq.Assoc2(writer.Bytes(), Spec.Structural(arr.l, Spec.View), Spec.ConcatBytes(arr.data, Spec.Item), Spec.Structural(arr.r, Spec.View)); + assert wr.Bytes() == writer.Bytes() + Spec.Bracketed(arr, Spec.Item); + assert Spec.Bracketed(arr, Spec.Item) == Spec.Array(arr) by { BracketedToArray(arr); } + wr + } + + function {:opaque} Members(obj: jobject, writer: Writer) : (wr: Writer) + decreases obj, 2 + ensures wr.Bytes() == writer.Bytes() + Spec.ConcatBytes(obj.data, Spec.Member) + { + MembersSpec(obj, obj.data, writer) + } by method { + wr := MembersImpl(obj, writer); + Assume(false); // BUG(https://github.com/dafny-lang/dafny/issues/2180) + } + + function {:opaque} Items(arr: jarray, writer: Writer) : (wr: Writer) + decreases arr, 2 + ensures wr.Bytes() == writer.Bytes() + Spec.ConcatBytes(arr.data, Spec.Item) + { + ItemsSpec(arr, arr.data, writer) + } by method { + wr := ItemsImpl(arr, writer); + Assume(false); // BUG(https://github.com/dafny-lang/dafny/issues/2180) + } + + ghost function MembersSpec(obj: jobject, members: seq, writer: Writer) : (wr: Writer) + requires forall j | 0 <= j < |members| :: members[j] < obj + decreases obj, 1, members + ensures wr.Bytes() == writer.Bytes() + Spec.ConcatBytes(members, Spec.Member) + { // TR elimination doesn't work for mutually recursive methods, so this + // function is only used as a spec for Members. + if members == [] then writer + else + var butLast, last := members[..|members|-1], members[|members|-1]; + assert members == butLast + [last]; + var wr := MembersSpec(obj, butLast, writer); + var wr := Member(obj, last, wr); + assert wr.Bytes() == writer.Bytes() + (Spec.ConcatBytes(butLast, Spec.Member) + Spec.ConcatBytes([last], Spec.Member)) by { + Seq.Assoc(writer.Bytes(), Spec.ConcatBytes(butLast, Spec.Member), Spec.ConcatBytes([last], Spec.Member)); + } + SpecProperties.ConcatBytes_Linear(butLast, [last], Spec.Member); + wr + } // No by method block here, because the loop invariant in the method version + // needs to call MembersSpec and the termination checker gets confused by + // that. Instead, see Members above. // DISCUSS + + // DISCUSS: Is there a way to avoid passing the ghost `v` around while + // maintaining the termination argument? Maybe the lambda for elements will be enough? + + ghost function SequenceSpec(v: Value, items: seq, + spec: T -> bytes, impl: (Value, T, Writer) --> Writer, + writer: Writer) + : (wr: Writer) + requires forall item, wr | item in items :: impl.requires(v, item, wr) + requires forall item, wr | item in items :: impl(v, item, wr).Bytes() == wr.Bytes() + spec(item) + decreases v, 1, items + ensures wr.Bytes() == writer.Bytes() + Spec.ConcatBytes(items, spec) + { // TR elimination doesn't work for mutually recursive methods, so this + // function is only used as a spec for Items. + if items == [] then writer + else + var writer := SequenceSpec(v, items[..|items|-1], spec, impl, writer); + assert items == items[..|items|-1] + [items[|items|-1]]; + SpecProperties.ConcatBytes_Linear(items[..|items|-1], [items[|items|-1]], spec); + impl(v, items[|items|-1], writer) + } // No by method block here, because the loop invariant in the method version + // needs to call `SequenceSpec` and the termination checker gets confused by + // that. Instead, see `Sequence`Items above. // DISCUSS + + + ghost function ItemsSpec(arr: jarray, items: seq, writer: Writer) : (wr: Writer) + requires forall j | 0 <= j < |items| :: items[j] < arr + decreases arr, 1, items + ensures wr.Bytes() == writer.Bytes() + Spec.ConcatBytes(items, Spec.Item) + { // TR elimination doesn't work for mutually recursive methods, so this + // function is only used as a spec for Items. + if items == [] then writer + else + var butLast, last := items[..|items|-1], items[|items|-1]; + assert items == butLast + [last]; + var wr := ItemsSpec(arr, butLast, writer); + var wr := Item(arr, last, wr); + assert wr.Bytes() == writer.Bytes() + (Spec.ConcatBytes(butLast, Spec.Item) + Spec.ConcatBytes([last], Spec.Item)) by { + Seq.Assoc(writer.Bytes(), Spec.ConcatBytes(butLast, Spec.Item), Spec.ConcatBytes([last], Spec.Item)); + } + SpecProperties.ConcatBytes_Linear(butLast, [last], Spec.Item); + wr + } // No by method block here, because the loop invariant in the method version + // needs to call ItemsSpec and the termination checker gets confused by + // that. Instead, see Items above. // DISCUSS + + method MembersImpl(obj: jobject, writer: Writer) returns (wr: Writer) + decreases obj, 1 + ensures wr == MembersSpec(obj, obj.data, writer); + { + wr := writer; + var members := obj.data; + assert wr == MembersSpec(obj, members[..0], writer); + for i := 0 to |members| // FIXME uint32 + invariant wr == MembersSpec(obj, members[..i], writer) + { + assert members[..i+1][..i] == members[..i]; + wr := Member(obj, members[i], wr); + } + assert members[..|members|] == members; + assert wr == MembersSpec(obj, members, writer); + } + + method ItemsImpl(arr: jarray, writer: Writer) returns (wr: Writer) + decreases arr, 1 + ensures wr == ItemsSpec(arr, arr.data, writer); + { + wr := writer; + var items := arr.data; + assert wr == ItemsSpec(arr, items[..0], writer); + for i := 0 to |items| // FIXME uint32 + invariant wr == ItemsSpec(arr, items[..i], writer) + { + assert items[..i+1][..i] == items[..i]; + wr := Item(arr, items[i], wr); + } + assert items[..|items|] == items; + } + + function {:opaque} Member(ghost obj: jobject, m: jmember, writer: Writer) : (wr: Writer) + requires m < obj + decreases obj, 0 + ensures wr.Bytes() == writer.Bytes() + Spec.Member(m) + { + var wr := String(m.t.k, writer); + var wr := StructuralView(m.t.colon, wr); + var wr := Value(m.t.v, wr); + assert wr.Bytes() == writer.Bytes() + (Spec.String(m.t.k) + Spec.Structural(m.t.colon, Spec.View) + Spec.Value(m.t.v)) by { + Seq.Assoc2( writer.Bytes(), Spec.String(m.t.k), Spec.Structural(m.t.colon, Spec.View), Spec.Value(m.t.v)); + } + var wr := if m.suffix.Empty? then wr else StructuralView(m.suffix.t, wr); + assert wr.Bytes() == writer.Bytes() + Spec.KeyValue(m.t) + Spec.CommaSuffix(m.suffix) by { + if m.suffix.Empty? { + Neutral(Spec.KeyValue(m.t)); + Seq.Assoc'(writer.Bytes(), Spec.KeyValue(m.t), []); + } + else { + assert Spec.StructuralView(m.suffix.t) == Spec.CommaSuffix(m.suffix); + } + } + assert wr.Bytes() == writer.Bytes() + (Spec.KeyValue(m.t) + Spec.CommaSuffix(m.suffix)) by { + Seq.Assoc(writer.Bytes(), Spec.KeyValue(m.t), Spec.CommaSuffix(m.suffix)); + } + wr + } + + function {:opaque} Item(ghost arr: jarray, m: jitem, writer: Writer) : (wr: Writer) + requires m < arr + decreases arr, 0 + ensures wr.Bytes() == writer.Bytes() + Spec.Item(m) + { + var wr := Value(m.t, writer); + var wr := if m.suffix.Empty? then wr else StructuralView(m.suffix.t, wr); + assert wr.Bytes() == writer.Bytes() + (Spec.Value(m.t) + Spec.CommaSuffix(m.suffix)) by { + Seq.Assoc(writer.Bytes(), Spec.Value(m.t), Spec.CommaSuffix(m.suffix)); + } + wr + } +} diff --git a/src/Math.dfy b/src/Math.dfy index 6ec5d2ed..17cf15f2 100644 --- a/src/Math.dfy +++ b/src/Math.dfy @@ -22,4 +22,9 @@ module {:options "-functionSyntax:4"} Math { a } + function Abs(a: int): (a': int) + ensures a' >= 0 + { + if a >= 0 then a else -a + } } diff --git a/src/NonlinearArithmetic/DivMod.dfy b/src/NonlinearArithmetic/DivMod.dfy index 89e22bcc..de38effd 100644 --- a/src/NonlinearArithmetic/DivMod.dfy +++ b/src/NonlinearArithmetic/DivMod.dfy @@ -111,6 +111,22 @@ module {:options "-functionSyntax:4"} DivMod { } } + lemma LemmaDivNonZero(x: int, d: int) + requires x >= d > 0 + ensures x / d > 0 + { + LemmaDivPosIsPosAuto(); + if x / d == 0 { + LemmaSmallDivConverseAuto(); + } + } + + lemma LemmaDivNonZeroAuto() + ensures forall x, d {:trigger x / d } | x >= d > 0 :: x / d > 0 + { + forall x, d | x >= d > 0 { LemmaDivNonZero(x, d); } + } + /* given two fractions with the same numerator, the order of numbers is determined by the denominators. However, if the numerator is 0, the fractions are equal regardless of the denominators' values */ diff --git a/src/NonlinearArithmetic/Logarithm.dfy b/src/NonlinearArithmetic/Logarithm.dfy new file mode 100644 index 00000000..936d7ac2 --- /dev/null +++ b/src/NonlinearArithmetic/Logarithm.dfy @@ -0,0 +1,103 @@ +// RUN: %verify --disable-nonlinear-arithmetic "%s" + +include "Mul.dfy" +include "DivMod.dfy" +include "Power.dfy" + +module {:options "-functionSyntax:4"} Logarithm { + import opened Mul + import opened DivMod + import opened Power + + function {:opaque} Log(base: nat, pow: nat): nat + requires base > 1 + decreases pow + { + if pow < base then 0 + else + LemmaDivPosIsPosAuto(); LemmaDivDecreasesAuto(); + 1 + Log(base, pow / base) + } + + lemma {:induction false} LemmaLog0(base: nat, pow: nat) + requires base > 1 + requires pow < base + ensures Log(base, pow) == 0 + { + reveal Log(); + } + + lemma {:induction false} LemmaLogS(base: nat, pow: nat) + requires base > 1 + requires pow >= base + ensures pow / base >= 0 + ensures Log(base, pow) == 1 + Log(base, pow / base) + { + LemmaDivPosIsPosAuto(); + reveal Log(); + } + + lemma {:induction false} LemmaLogSAuto() + ensures forall base: nat, pow: nat + {:trigger Log(base, pow / base)} + | && base > 1 + && pow >= base + :: && pow / base >= 0 + && Log(base, pow) == 1 + Log(base, pow / base) + { + forall base: nat, pow: nat | && base > 1 && pow >= base + ensures && pow / base >= 0 + && Log(base, pow) == 1 + Log(base, pow / base) + { + LemmaLogS(base, pow); + } + } + + lemma {:induction false} LemmaLogIsOrdered(base: nat, pow: nat, pow': nat) + requires base > 1 + requires pow <= pow' + ensures Log(base, pow) <= Log(base, pow') + decreases pow + { + reveal Log(); + if pow' < base { + assert Log(base, pow) == 0 == Log(base, pow'); + } else if pow < base { + assert Log(base, pow) == 0; + } else { + LemmaDivPosIsPosAuto(); LemmaDivDecreasesAuto(); LemmaDivIsOrderedAuto(); + LemmaLogIsOrdered(base, pow / base, pow' / base); + } + } + + lemma {:induction false} LemmaLogPow(base: nat, n: nat) + requires base > 1 + ensures (LemmaPowPositive(base, n); Log(base, Pow(base, n)) == n) + { + if n == 0 { + reveal Pow(); + reveal Log(); + } else { + LemmaPowPositive(base, n); + calc { + Log(base, Pow(base, n)); + { reveal Pow(); } + Log(base, base * Pow(base, n - 1)); + { LemmaPowPositive(base, n - 1); + LemmaMulIncreases(Pow(base, n - 1), base); + LemmaLogS(base, base * Pow(base, n - 1)); } + 1 + Log(base, (base * Pow(base, n - 1)) / base); + { LemmaDivMultiplesVanish(Pow(base, n - 1), base); } + 1 + Log(base, Pow(base, n - 1)); + { LemmaLogPow(base, n - 1); } + 1 + (n - 1); + } + } + } + + // TODO + // lemma {:induction false} Pow_Log(base: nat, pow: nat) + // requires base > 1 + // requires pow > 0 + // ensures Pow(base, Log(base, pow)) <= pow < Pow(base, Log(base, pow) + 1) +} diff --git a/src/Unicode/UnicodeEncodingForm.dfy b/src/Unicode/UnicodeEncodingForm.dfy index d7f8a463..8119b0e0 100644 --- a/src/Unicode/UnicodeEncodingForm.dfy +++ b/src/Unicode/UnicodeEncodingForm.dfy @@ -239,6 +239,23 @@ abstract module {:options "-functionSyntax:4"} UnicodeEncodingForm { LemmaFlattenMinimalWellFormedCodeUnitSubsequences(ms); Seq.Flatten(ms) } + by method { + // Optimize to to avoid allocating the intermediate unflattened sequence. + // We can't quite use Seq.FlatMap easily because we need to prove the result + // is not just a seq but a WellFormedCodeUnitSeq. + // TODO: We can be even more efficient by using a JSON.Utils.Vectors.Vector instead. + s := []; + ghost var unflattened: seq := []; + for i := |vs| downto 0 + invariant unflattened == Seq.Map(EncodeScalarValue, vs[i..]) + invariant s == Seq.Flatten(unflattened) + { + var next: MinimalWellFormedCodeUnitSeq := EncodeScalarValue(vs[i]); + unflattened := [next] + unflattened; + LemmaPrependMinimalWellFormedCodeUnitSubsequence(next, s); + s := next + s; + } + } /** * Returns the scalar value sequence encoded by the given well-formed Unicode string. diff --git a/src/Unicode/UnicodeStrings.dfy b/src/Unicode/UnicodeStrings.dfy new file mode 100644 index 00000000..879edcc8 --- /dev/null +++ b/src/Unicode/UnicodeStrings.dfy @@ -0,0 +1,62 @@ +// RUN: %verify %s + +/// Converting between strings and UTF-8/UTF-16 +/// ============================================= +/// +/// This abstract module defines an interface for converting +/// between the Dafny `string` type and sequences of UTF-8 and UTF-16 +/// codepoints, where codepoints are represents as bounded ints +/// (`uint8` and `uint16`). +/// +/// Because the `--unicode-char` option changes the meaning of the `char` +/// type and hence the `string` type, there are two different concrete +/// implementations in separate files. +/// Only include the one that matches your value of that option! +/// +/// If you also want to maintain code that works for either mode, +/// you have two options: +/// +/// 1. Implement your logic in an abstract module as well that +/// imports this one, and define two different refining modules +/// that import the appropriate UnicodeStrings module. +/// 2. Do not `include` any of these three files in your source code, +/// instead passing the appropriate file to Dafny when verifying and building, +/// so that references to `UnicodeStrings` can be resolved. +/// See the JSON modules as an example. +/// +/// Option 2. avoids needing to write boilerplate refining modules, +/// but is less IDE-friendly until we have better project configuration support. + +include "../BoundedInts.dfy" +include "../Wrappers.dfy" +include "../Collections/Sequences/Seq.dfy" +include "Utf8EncodingForm.dfy" +include "Utf16EncodingForm.dfy" + +abstract module {:options "-functionSyntax:4"} AbstractUnicodeStrings { + + import Seq + + import opened Wrappers + import opened BoundedInts + + function ToUTF8Checked(s: string): Option> + + function ASCIIToUTF8(s: string): seq + requires forall i | 0 <= i < |s| :: 0 <= s[i] as int < 128 + { + Seq.Map(c requires 0 <= c as int < 128 => c as uint8, s) + } + + function FromUTF8Checked(bs: seq): Option + + function ToUTF16Checked(s: string): Option> + + function ASCIIToUTF16(s: string): seq + requires forall i | 0 <= i < |s| :: 0 <= s[i] as int < 128 + { + Seq.Map(c requires 0 <= c as int < 128 => c as uint16, s) + } + + function FromUTF16Checked(bs: seq): Option +} diff --git a/src/Unicode/UnicodeStringsWithUnicodeChar.dfy b/src/Unicode/UnicodeStringsWithUnicodeChar.dfy new file mode 100644 index 00000000..31a418e2 --- /dev/null +++ b/src/Unicode/UnicodeStringsWithUnicodeChar.dfy @@ -0,0 +1,86 @@ +// RUN: %verify --unicode-char:true %s + +/// Converting between strings and UTF-8/UTF-16 +/// ============================================= +/// +/// Implementation of `AbstractUnicodeStrings` for `--unicode-char:true`. +/// See `UnicodeStrings.dfy` for details. + +include "UnicodeStrings.dfy" +include "../Wrappers.dfy" +include "../Collections/Sequences/Seq.dfy" + +module {:options "-functionSyntax:4"} UnicodeStrings refines AbstractUnicodeStrings { + + import Unicode + import Utf8EncodingForm + import Utf16EncodingForm + + lemma {:vcs_split_on_every_assert} CharIsUnicodeScalarValue(c: char) + ensures + && var asBits := c as bv24; + && asBits <= 0x10_FFFF + && (0 <= asBits < Unicode.HIGH_SURROGATE_MIN || Unicode.LOW_SURROGATE_MAX < asBits) + { + assert c as int < 0x11_0000; + // This seems to be just too expensive to verify for such a wide bit-vector type, + // but is clearly true given the above. + assume {:axiom} c as bv24 < 0x11_0000 as bv24; + var asBits := c as int as bv24; + assert (asBits < Unicode.HIGH_SURROGATE_MIN || asBits > Unicode.LOW_SURROGATE_MAX); + assert asBits <= 0x10_FFFF; + } + + lemma UnicodeScalarValueIsChar(sv: Unicode.ScalarValue) + ensures + && var asInt := sv as int; + && (0 <= asInt < 0xD800 || 0xE000 <= asInt < 0x11_0000) + { + var asInt := sv as int; + assert (asInt < 0xD800 || asInt > 0xDFFF); + assert (asInt < 0xDBFF || asInt > 0xDC00); + } + + function CharAsUnicodeScalarValue(c: char): Unicode.ScalarValue { + CharIsUnicodeScalarValue(c); + c as Unicode.ScalarValue + } + + function CharFromUnicodeScalarValue(sv: Unicode.ScalarValue): char { + UnicodeScalarValueIsChar(sv); + // TODO: Can we avoid the extra cast to int? + sv as int as char + } + + function ToUTF8Checked(s: string): Option> + ensures ToUTF8Checked(s).Some? + { + var asCodeUnits := Seq.Map(CharAsUnicodeScalarValue, s); + var asUtf8CodeUnits := Utf8EncodingForm.EncodeScalarSequence(asCodeUnits); + var asBytes := Seq.Map(cu => cu as uint8, asUtf8CodeUnits); + Some(asBytes) + } + + function FromUTF8Checked(bs: seq): Option { + var asCodeUnits := Seq.Map(c => c as Utf8EncodingForm.CodeUnit, bs); + var utf32 :- Utf8EncodingForm.DecodeCodeUnitSequenceChecked(asCodeUnits); + var asChars := Seq.Map(CharFromUnicodeScalarValue, utf32); + Some(asChars) + } + + function ToUTF16Checked(s: string): Option> + ensures ToUTF16Checked(s).Some? + { + var asCodeUnits := Seq.Map(CharAsUnicodeScalarValue, s); + var asUtf16CodeUnits := Utf16EncodingForm.EncodeScalarSequence(asCodeUnits); + var asBytes := Seq.Map(cu => cu as uint16, asUtf16CodeUnits); + Some(asBytes) + } + + function FromUTF16Checked(bs: seq): Option { + var asCodeUnits := Seq.Map(c => c as Utf16EncodingForm.CodeUnit, bs); + var utf32 :- Utf16EncodingForm.DecodeCodeUnitSequenceChecked(asCodeUnits); + var asChars := Seq.Map(CharFromUnicodeScalarValue, utf32); + Some(asChars) + } +} diff --git a/src/Unicode/UnicodeStringsWithoutUnicodeChar.dfy b/src/Unicode/UnicodeStringsWithoutUnicodeChar.dfy new file mode 100644 index 00000000..9c5b5f59 --- /dev/null +++ b/src/Unicode/UnicodeStringsWithoutUnicodeChar.dfy @@ -0,0 +1,52 @@ +// RUN: %verify --unicode-char:false %s + +/// Converting between strings and UTF-8/UTF-16 +/// ============================================= +/// +/// Implementation of `AbstractUnicodeStrings` for `--unicode-char:false`. +/// See `UnicodeStrings.dfy` for details. + +include "UnicodeStrings.dfy" +include "../Wrappers.dfy" +include "../Collections/Sequences/Seq.dfy" + +module {:options "-functionSyntax:4"} UnicodeStrings refines AbstractUnicodeStrings { + + import Unicode + import Utf8EncodingForm + import Utf16EncodingForm + + predicate IsWellFormedString(s: string) + ensures |s| == 0 ==> IsWellFormedString(s) + { + Utf16EncodingForm.IsWellFormedCodeUnitSequence(Seq.Map(c => c as Utf16EncodingForm.CodeUnit, s)) + } + + function ToUTF8Checked(s: string): Option> { + var asCodeUnits := Seq.Map(c => c as Utf16EncodingForm.CodeUnit, s); + var utf32 :- Utf16EncodingForm.DecodeCodeUnitSequenceChecked(asCodeUnits); + var asUtf8CodeUnits := Utf8EncodingForm.EncodeScalarSequence(utf32); + Some(Seq.Map(c => c as byte, asUtf8CodeUnits)) + } + + function {:vcs_split_on_every_assert} FromUTF8Checked(bs: seq): Option { + var asCodeUnits := Seq.Map(c => c as Utf8EncodingForm.CodeUnit, bs); + var utf32 :- Utf8EncodingForm.DecodeCodeUnitSequenceChecked(asCodeUnits); + var asUtf16CodeUnits := Utf16EncodingForm.EncodeScalarSequence(utf32); + Some(Seq.Map(cu => cu as char, asUtf16CodeUnits)) + } + + function ToUTF16Checked(s: string): Option> { + if Utf16EncodingForm.IsWellFormedCodeUnitSequence(Seq.Map(c => c as Utf16EncodingForm.CodeUnit, s)) then + Some(Seq.Map(c => c as uint16, s)) + else + None + } + + function FromUTF16Checked(bs: seq): Option { + if Utf16EncodingForm.IsWellFormedCodeUnitSequence(Seq.Map(c => c as Utf16EncodingForm.CodeUnit, bs)) then + Some(Seq.Map(c => c as char, bs)) + else + None + } +} diff --git a/src/Wrappers.dfy b/src/Wrappers.dfy index 852d9a8c..167cb574 100644 --- a/src/Wrappers.dfy +++ b/src/Wrappers.dfy @@ -14,6 +14,12 @@ module {:options "-functionSyntax:4"} Wrappers { case None() => Failure("Option is None") } + function ToResult'(error: R): Result { + match this + case Some(v) => Success(v) + case None() => Failure(error) + } + function UnwrapOr(default: T): T { match this case Some(v) => v