Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ABNF parser rewrite #213

Merged
merged 23 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7dba220
A complete re-write of the ABNF parser. Removed dependencies on uri-j…
ldthomas Apr 29, 2024
c885a24
Updated package/siwe-parser. package.json: 1)the dependency on apg-js…
ldthomas May 6, 2024
e179393
Committed four files missed in the last commit.
ldthomas May 6, 2024
a99fe58
Merge remote-tracking branch 'upstream/main'
ldthomas May 6, 2024
d623f92
Merge branch 'fix'. Used 'fix' to make and test changes required for …
ldthomas May 6, 2024
b113a20
Modified the ABNF grammar and parser to match the spec for (present/e…
ldthomas May 6, 2024
8551663
Added new function "isUri()" to "packages/siwe-parser/lib/abnf.ts". A…
ldthomas May 8, 2024
36fd7b8
Modifed the ParsedMessage callbacks to validate the semantics as well…
ldthomas May 11, 2024
b52ce69
Updated validation of date-times in ParsedMessage. Fixed message obje…
ldthomas May 13, 2024
ee48397
Removed debugging (doTrace) code from t-chars.test.ts".
ldthomas May 18, 2024
5874106
Fixed siwe/lib/utils.ts bug. Converted all unit tests to JSON file me…
ldthomas May 24, 2024
60c01d0
Removed a development-only script from siwe package.json.
ldthomas May 25, 2024
449e6d6
remove commented code
chunningham Nov 25, 2024
95730e2
fix invalid opts check
chunningham Nov 25, 2024
add709f
fix tests, SiweErrorType has no values, is not object
chunningham Nov 25, 2024
b2b982b
Fix test
w4ll3 Dec 2, 2024
b53e7d9
Fix typos
w4ll3 Dec 2, 2024
1310a47
Lint files
w4ll3 Dec 2, 2024
c375dc0
Typos
w4ll3 Dec 2, 2024
3ceee36
Upgrade to ES2018
w4ll3 Dec 2, 2024
5c8388c
Remove deprecated method
w4ll3 Dec 2, 2024
3d4ed3d
Bump version
w4ll3 Dec 2, 2024
7b7bb4f
Bump siwe-parser version
w4ll3 Jan 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,104 changes: 483 additions & 621 deletions package-lock.json

Large diffs are not rendered by default.

507 changes: 145 additions & 362 deletions packages/siwe-parser/lib/abnf.ts

Large diffs are not rendered by default.

529 changes: 529 additions & 0 deletions packages/siwe-parser/lib/callbacks.ts

Large diffs are not rendered by default.

51 changes: 27 additions & 24 deletions packages/siwe-parser/lib/parsers.test.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
import { ParsedMessage } from "./abnf";
import * as fs from "fs";

const parsingPositive: object = JSON.parse(fs.readFileSync('../../test/parsing_positive.json', 'utf8'));
const parsingNegative: object = JSON.parse(fs.readFileSync('../../test/parsing_negative.json', 'utf8'));
const parsingPositive: object = JSON.parse(
fs.readFileSync("../../test/parsing_positive.json", "utf8")
);
const parsingNegative: object = JSON.parse(
fs.readFileSync("../../test/parsing_negative.json", "utf8")
);

//
describe("Successfully parses with ABNF Client", () => {
test.concurrent.each(Object.entries(parsingPositive))(
"Parses message successfully: %s",
(test_name, test) => {
const parsedMessage = new ParsedMessage(test.message);
for (const [field, value] of Object.entries(test.fields)) {
if (value === null) {
expect(parsedMessage[field]).toBeUndefined();
}
else if (typeof value === "object") {
expect(parsedMessage[field]).toStrictEqual(value);
} else {
expect(parsedMessage[field]).toBe(value);
}
}
}
);
test.concurrent.each(Object.entries(parsingPositive))(
"Parses message successfully: %s",
(test_name, test) => {
const parsedMessage = new ParsedMessage(test.message);
for (const [field, value] of Object.entries(test.fields)) {
if (value === null) {
expect(parsedMessage[field]).toBeUndefined();
} else if (typeof value === "object") {
expect(parsedMessage[field]).toStrictEqual(value);
} else {
expect(parsedMessage[field]).toBe(value);
}
}
}
);
});

describe("Successfully fails with ABNF Client", () => {
test.concurrent.each(Object.entries(parsingNegative))(
"Fails to parse message: %s",
(test_name, test) => {
expect(() => new ParsedMessage(test)).toThrow();
}
);
test.concurrent.each(Object.entries(parsingNegative))(
"Fails to parse message: %s",
(test_name, test) => {
expect(() => new ParsedMessage(test)).toThrow();
}
);
});
10 changes: 4 additions & 6 deletions packages/siwe-parser/lib/parsers.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { ParsedMessage as ABNFParsedMessage } from "./abnf";
export * from './utils';
export {
ABNFParsedMessage as ParsedMessage
};


import { isUri as ABNFisUri } from "./abnf";
export * from "./utils";
export { ABNFParsedMessage as ParsedMessage };
export { ABNFisUri as isUri };
212 changes: 212 additions & 0 deletions packages/siwe-parser/lib/siwe-abnf.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
; LDT 05/06/2024
; modified in several significant ways
; 1) Literal strings are replaced with numbers and ranges (%d32 & %d32-126, etc.) when possible.
; TRB and especially TRG operators are much more efficient than TLS operators.
; 2) Two rules, authority and URI, are used multiple times in different contexts. These rules will be reproduced and renamed
; in order to a) recognize the context and b) remove unneccary callback functions for certain contexts.
; This will simiplify recognizing contexts AND remove unneccesary callbacks
; 2.a) domain is defined as authority-d which is identical to authority except that there will be no
; callback functions defined on authority-d or any of its *-d components.
; 2.b) The resource URI is defined as URI-r and its components defined as *-r.
; In this way, callback functions can be defined on URI and is components while
; leaving URI-r to be parsed identically with no unnecessary callback functions to slow it down.
; 3) IPv6address does not work because of APG's "first-success disambiguation" and "greedy" repetitions.
; IPv6address redefined and validations moved to callback functions (semantic vs syntactic validation)
; Redefinition requires negative look-ahead operators, https://en.wikipedia.org/wiki/Syntactic_predicate
; That is SABNF instead of simple ABNF.
; 4) IPv4address fails because of "first-success disambiguation".
; This could be fixed with rearrangement of the alternative terms. However, it would still not
; accept zero-padded (leading zeros) decimal octets.
; Therefore, IPv4address is also done with callback functions and semantic validation.
; 5) The negative look-ahead operator is also needed in the definition of host to
; prevent failure with a reg-name that begins with an IPv4 address.
; 6) NOTE: host = 1.1.1.256 is a valid host name even though it is an invalid IPv4address.
; The IPv4address alternative fails but the reg-name alternative succeeds.
; 7) The Ethereum spec (https://eips.ethereum.org/EIPS/eip-4361) message format ABNF
; allows for empty statements. Because of the "first success disambiguation" of APG
; the an explicit "empty-statement" rule is required to match the spec's intent.


sign-in-with-ethereum =
oscheme domain %s" wants you to sign in with your Ethereum account:" LF
address LF
((LF statement LF LF) / empty-statement / (LF LF))
%s"URI: " URI LF
%s"Version: " version LF
%s"Chain ID: " chain-id LF
%s"Nonce: " nonce LF
%s"Issued At: " issued-at
[ LF ex-title expiration-time ]
[ LF nb-title not-before ]
[ LF ri-title request-id ]
[ LF re-title resources ]
ex-title = %s"Expiration Time: "
nb-title = %s"Not Before: "
ri-title = %s"Request ID: "
re-title = %s"Resources:"
oscheme = [ ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 ) "://" ]
domain = authority-d
address = "0x" 40*40HEXDIG
; Must also conform to captilization
; checksum encoding specified in EIP-55
; where applicable (EOAs).

statement = 1*( %d97-122 / %d65-90 / %d48-57 / %d32-33 / %d35-36 / %d38-59 / %d61 / %d63-64 / %d91 / %d93 / %d95 / %d126)
; The purpose is to exclude LF (line breaks).
; LDT 10/04/2023: Do you mean %d32-126? All printing characters
empty-statement = LF LF LF
version = "1"
nonce = 8*( ALPHA / DIGIT )
issued-at = date-time
expiration-time = date-time
not-before = date-time
request-id = *pchar
chain-id = 1*DIGIT
; See EIP-155 for valid CHAIN_IDs.
resources = *( LF resource )
resource = "- " URI-r

; ------------------------------------------------------------------------------
; RFC 3986

URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
hier-part = "//" authority path-abempty
/ path-absolute
/ path-rootless
/ path-empty
scheme = ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 )
authority = [ userinfo-at ] host [ ":" port ]
path-abempty = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-rootless = segment-nz *( "/" segment )
path-empty = ""
userinfo-at = userinfo %d64
; userinfo redefined to include the "@" so that it will fail without it
; otherwise userinfo can match host and then the parser will backtrack
; incorrectly keeping the captured userinfo phrase
userinfo = *(%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d58-59 / %d61 / %d95 / %d126)
host = IP-literal / (IPv4address !reg-name-char) / reg-name
; negative look-ahead required to prevent IPv4address from being recognized as first part of reg-name
; same fix as https://github.com/garycourt/uri-js/issues/4
IP-literal = "[" ( IPv6address / IPvFuture ) "]"
IPvFuture = "v" 1*HEXDIG "." 1*( %d97-122 / %d65-90 / %d48-57 / %d33 / %d36 /%d38-46 / %d58-59 /%d61 /%d95 / %d126 )
IPv6address = nodcolon / dcolon
nodcolon = (h16n *h16cn) [%d58 IPv4address]
dcolon = [h16 *h16c] %d58.58 (((h16n *h16cn) [%d58 IPv4address]) / [IPv4address])
h16 = 1*4HEXDIG
h16c = %d58 1*4HEXDIG
h16n = 1*4HEXDIG !%d46
h16cn = %d58 1*4HEXDIG !%d46
IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
; Here we will will use callback functions to evaluate and validate the (possibly zero-padded) dec-octet.
dec-octet = *3dec-digit
dec-digit = %d48-57
reg-name = *reg-name-char
reg-name-char = %d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d59 / %d61 /%d95 / %d126
port = *DIGIT
query = *(pchar / %d47 / %d63)
fragment = *(pchar / %d47 / %d63)

; URI-r is a redefiniton of URI but without the callback functions attached to it
; it reuses athority-d from domain
URI-r = scheme-r ":" hier-part-r [ "?" query-r ] [ "#" fragment-r ]
hier-part-r = "//" authority-d path-abempty-r
/ path-absolute-r
/ path-rootless-r
/ path-empty-r
scheme-r = ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 )
query-r = *(pchar / %d47 / %d63)
fragment-r = *(pchar / %d47 / %d63)

; authority-d is a redefinition of authority for capturing the domian phrase
; but without callback functions
; it is reused for URI- for the same reason
authority-d = [ userinfo-d %d64 ] host-d [ ":" port-d ]
userinfo-d = *(%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d58-59 / %d61 / %d95 / %d126)
host-d = IP-literal / (IPv4address !reg-name-char) / reg-name
port-d = *DIGIT

; for use with URI-r
path-abempty-r = *( "/" segment )
path-absolute-r = "/" [ segment-nz *( "/" segment ) ]
path-rootless-r = segment-nz *( "/" segment )
path-empty-r = ""
segment = *pchar
segment-nz = 1*pchar
pchar = (%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 /%d58-59 / %d61 / %d64 / %d95 / %d126)
pct-encoded = %d37 HEXDIG HEXDIG

; no longer needed - expanded for all usage for fewer branches in the parse there
; and more efficient use of the TBS & TRG operators in place of TLS and rule names
; does not work with APG probably because of "first-success disambiguation" and greedy repetitions.
; will replace with semantic checking of valid number of h16s
;IPv6address = 6( h16 ":" ) ls32
; / "::" 5( h16 ":" ) ls32
; / [ h16 ] "::" 4( h16 ":" ) ls32
; / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
; / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
; / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
; / [ *4( h16 ":" ) h16 ] "::" ls32
; / [ *5( h16 ":" ) h16 ] "::" h16
; / [ *6( h16 ":" ) h16 ] "::"
;ls32 = ( h16 ":" h16 ) / IPv4address
; dec-octet does not work because of "first-success disambiguation".
; Must have the longest (3-digit) numbers first.
; Even so, this form does not accept leading zeros.
; There does not seem to be a clear standard for this (https://en.wikipedia.org/wiki/Dot-decimal_notation)
; however and early RFC 790 did show leading-zero padding of the three digits.
;dec-octet = DIGIT ; 0-9
; / %x31-39 DIGIT ; 10-99
; / "1" 2DIGIT ; 100-199
; / "2" %x30-34 DIGIT ; 200-249
; / "25" %x30-35 ; 250-255
;statement = 1*( reserved / unreserved / " " )
;scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
;authority = [ userinfo "@" ] host [ ":" port ]
;userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
;query = *( pchar / "/" / "?" )
;fragment = *( pchar / "/" / "?" )
;IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
;reg-name = *( unreserved / pct-encoded / sub-delims )
;pct-encoded = "%" HEXDIG HEXDIG
;pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
;path-empty = 0pchar; deprecated - empty literal string, "", is more efficient
;unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
;reserved = gen-delims / sub-delims
;gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
;sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
; / "*" / "+" / "," / ";" / "="
;HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"

; ------------------------------------------------------------------------------
; RFC 3339

date-fullyear = 4DIGIT
date-month = 2DIGIT ; 01-12
date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
; month/year
time-hour = 2DIGIT ; 00-23
time-minute = 2DIGIT ; 00-59
time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second
; rules
time-secfrac = "." 1*DIGIT
time-numoffset = ("+" / "-") time-hour ":" time-minute
time-offset = "Z" / time-numoffset

partial-time = time-hour ":" time-minute ":" time-second
[time-secfrac]
full-date = date-fullyear "-" date-month "-" date-mday
full-time = partial-time time-offset

date-time = full-date "T" full-time

; ------------------------------------------------------------------------------
; RFC 5234

ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
LF = %x0A
; linefeed
DIGIT = %x30-39
; 0-9
HEXDIG = %d48-57 / %d65-70 / %d97-102

Loading
Loading