Skip to content

Commit

Permalink
Merge pull request #74 from shwestrick/allow-extended-text-consts
Browse files Browse the repository at this point in the history
SuccessorML extended text constants
  • Loading branch information
shwestrick authored Jan 9, 2023
2 parents a67db32 + 266c422 commit 5188a13
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 31 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,8 @@ SuccessorML optional bar syntax is allowed.
SuccessorML record punning syntax is allowed.

`-allow-or-pats [true|false]` (default `false`) controls whether or not
SuccessorML or-pattern syntax is allowed.
SuccessorML or-pattern syntax is allowed.

`allow-extended-text-consts [true|false]` (default `false`) controls whether
or not SuccessorML extended text constants are allowed. Enable this to allow
for UTF-8 characters within strings.
2 changes: 0 additions & 2 deletions src/ast/sources.mlb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ local
MaybeLongToken.sml
AstType.sml
Ast.sml
AstAllows.sml
in
structure Token
structure MaybeLongToken
structure Ast
structure AstAllows
end
23 changes: 21 additions & 2 deletions src/ast/AstAllows.sml → src/base/AstAllows.sml
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,36 @@
structure AstAllows:
sig
type t
val make: {topExp: bool, optBar: bool, recordPun: bool, orPat: bool} -> t

val make:
{ topExp: bool
, optBar: bool
, recordPun: bool
, orPat: bool
, extendedText: bool
}
-> t

val topExp: t -> bool
val optBar: t -> bool
val recordPun: t -> bool
val orPat: t -> bool
val extendedText: t -> bool
end =
struct
datatype t = T of {topExp: bool, optBar: bool, recordPun: bool, orPat: bool}
datatype t =
T of
{ topExp: bool
, optBar: bool
, recordPun: bool
, orPat: bool
, extendedText: bool
}

fun make x = T x
fun topExp (T x) = #topExp x
fun optBar (T x) = #optBar x
fun recordPun (T x) = #recordPun x
fun orPat (T x) = #orPat x
fun extendedText (T x) = #extendedText x
end
4 changes: 3 additions & 1 deletion src/base/sources.mlb
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@ DocVar.sml
Tab.sml

PrettySimpleDoc.sml
PrettyTabbedDoc.sml
PrettyTabbedDoc.sml

AstAllows.sml
26 changes: 18 additions & 8 deletions src/lex-mlb/MLBLexer.sml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,24 @@ struct


fun expectSMLToken check src =
case Lexer.next src of
NONE => raise Fail "Lexer bug!"
| SOME ptok =>
(** TODO: some inefficiency here *)
if check (Token.fromPre ptok) then
success (MLBToken.Pretoken.fromSMLPretoken ptok)
else
raise Fail "Lexer bug!"
let
val smlLexerAllows = AstAllows.make
{ topExp = false
, optBar = false
, recordPun = false
, orPat = false
, extendedText = false
}
in
case Lexer.next smlLexerAllows src of
NONE => raise Fail "Lexer bug!"
| SOME ptok =>
(** TODO: some inefficiency here *)
if check (Token.fromPre ptok) then
success (MLBToken.Pretoken.fromSMLPretoken ptok)
else
raise Fail "Lexer bug!"
end


fun next (src: Source.t) : MLBToken.Pretoken.t option =
Expand Down
80 changes: 68 additions & 12 deletions src/lex/Lexer.sml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(** Copyright (c) 2020 Sam Westrick
(** Copyright (c) 2020-2023 Sam Westrick
*
* See the file LICENSE for details.
*)
Expand All @@ -8,12 +8,12 @@ sig
(** Get the next token in the given source. If there isn't one, returns NONE.
* raises Error if there's a problem.
*)
val next: Source.t -> Token.Pretoken.t option
val next: AstAllows.t -> Source.t -> Token.Pretoken.t option

(** Get all the tokens in the given source.
* raises Error if there's a problem.
*)
val tokens: Source.t -> Token.t Seq.t
val tokens: AstAllows.t -> Source.t -> Token.t Seq.t
end =
struct

Expand All @@ -28,7 +28,7 @@ struct
{header = "SYNTAX ERROR", pos = pos, what = what, explain = explain})


fun next (src: Source.t) : Token.Pretoken.t option =
fun next allows (src: Source.t) : Token.Pretoken.t option =
let
val startOffset = Source.absoluteStartOffset src
val src = Source.wholeFile src
Expand Down Expand Up @@ -57,6 +57,17 @@ struct
check (fn c' => c = c')


fun isPrint c =
let val i = Char.ord c
in 32 <= i andalso i <= 126
end

fun isMaybeUnicode c =
let val i = Char.ord c
in (128 <= i andalso i <= 253) (* ?? *)
end


(** ====================================================================
* STRING HANDLING
*)
Expand All @@ -80,24 +91,69 @@ struct
*)

fun advance_oneCharOrEscapeSequenceInString s (args as {stringStart}) =
if is backslash at s then
if
is backslash at s
then
advance_inStringEscapeSequence (s + 1) args
else if is #"\"" at s then

else if
is #"\"" at s
then
NONE
else if is #"\n" at s orelse isEndOfFileAt s then

else if
is #"\n" at s orelse isEndOfFileAt s
then
error
{ pos = slice (stringStart, stringStart + 1)
, what = "Unclosed string."
, explain = NONE
}
else if not (check Char.isPrint at s) then

else if
check isMaybeUnicode at s andalso not (AstAllows.extendedText allows)
then
error
{ pos = slice (s, s + 1)
, what = "Invalid character."
, explain = SOME
"Strings can only contain printable (visible or \
\whitespace) ASCII characters."
"There might be a Unicode (UTF-8) byte here. In Standard ML, \
\strings may only contain printable ASCII characters. However, \
\SuccessorML allows for UTF-8. To enable this feature, \
\use the command-line argument \
\'-allow-extended-text-consts true'"
}

else if
not (check isPrint at s) andalso not (check isMaybeUnicode at s)
andalso AstAllows.extendedText allows
then
error
{ pos = slice (s, s + 1)
, what = "Invalid character."
, explain = SOME
"There is an invalid byte here which may or may not be \
\visible. The \
\byte is invalid because it is not a printable ASCII \
\character, and also because it does \
\not appear to be UTF-8. \
\(UTF-8 bytes are allowed here due to either the \
\command-line argument '-allow-extended-text-consts true' or \
\an MLB annotation \"allowExtendedTextConsts true\".) "
}

else if
not (AstAllows.extendedText allows) andalso not (check isPrint at s)
then
error
{ pos = slice (s, s + 1)
, what = "Invalid character."
, explain = SOME
"There is an invalid byte here which may or may not be \
\visible. This byte is invalid because it is not a printable \
\character (visible or whitespace)."
}

else
SOME (EndOfChar (s + 1))

Expand Down Expand Up @@ -597,7 +653,7 @@ struct
end


fun tokens src =
fun tokens allows src =
let
val startOffset = Source.absoluteStartOffset src
val endOffset = Source.absoluteEndOffset src
Expand All @@ -613,7 +669,7 @@ struct
if offset >= endOffset then
finish acc
else
case next (Source.drop src offset) of
case next allows (Source.drop src offset) of
NONE => finish acc
| SOME tok => loop (tok :: acc) (tokEndOffset tok)
in
Expand Down
17 changes: 17 additions & 0 deletions src/parse-mlb/ParseAnnotations.sml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ struct
, topExp = AstAllows.topExp a
, recordPun = AstAllows.recordPun a
, orPat = AstAllows.orPat a
, extendedText = AstAllows.extendedText a
}


Expand All @@ -29,6 +30,7 @@ struct
, topExp = AstAllows.topExp a
, recordPun = b
, orPat = AstAllows.orPat a
, extendedText = AstAllows.extendedText a
}


Expand All @@ -38,6 +40,17 @@ struct
, topExp = AstAllows.topExp a
, recordPun = AstAllows.recordPun a
, orPat = b
, extendedText = AstAllows.extendedText a
}


fun allowExtendedTextConsts a b =
AstAllows.make
{ optBar = AstAllows.optBar a
, topExp = AstAllows.topExp a
, recordPun = AstAllows.recordPun a
, orPat = AstAllows.orPat a
, extendedText = b
}


Expand All @@ -59,6 +72,10 @@ struct
| ["allowOrPats", "false"] => allowOrPats allows false
| ["allowRecordPunExps", "true"] => allowRecordPunExps allows true
| ["allowRecordPunExps", "false"] => allowRecordPunExps allows false
| ["allowExtendedTextConsts", "true"] =>
allowExtendedTextConsts allows true
| ["allowExtendedTextConsts", "false"] =>
allowExtendedTextConsts allows false
| _ => allows
end

Expand Down
2 changes: 1 addition & 1 deletion src/parse/Parser.sml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct
fun parseWithInfdict allows infdict src =
let
(** This might raise Lexer.Error *)
val allTokens = Lexer.tokens src
val allTokens = Lexer.tokens allows src
val toks = Seq.filter (not o Token.isCommentOrWhitespace) allTokens
val numToks = Seq.length toks
fun tok i = Seq.nth toks i
Expand Down
10 changes: 10 additions & 0 deletions src/smlfmt.sml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ val optionalArgDesc =
\ Valid options are: true, false\n\
\ (default 'false')\n\
\\n\
\ [-allow-extended-text-consts B]\n\
\ Enable/disable SuccessorML extended text\n\
\ constants. Enable this to allow for UTF-8\n\
\ characters within strings.\n\
\ Valid options are: true, false\n\
\ (default 'false')\n\
\\n\
\ [--help] print this message\n"


Expand All @@ -73,6 +80,8 @@ val allowTopExp = CommandLineArgs.parseBool "allow-top-level-exps" true
val allowOptBar = CommandLineArgs.parseBool "allow-opt-bar" false
val allowRecordPun = CommandLineArgs.parseBool "allow-record-pun-exps" false
val allowOrPat = CommandLineArgs.parseBool "allow-or-pats" false
val allowExtendedText =
CommandLineArgs.parseBool "allow-extended-text-consts" false
val doDebug = CommandLineArgs.parseFlag "debug-engine"
val doForce = CommandLineArgs.parseFlag "force"
val doHelp = CommandLineArgs.parseFlag "help"
Expand All @@ -88,6 +97,7 @@ val allows = AstAllows.make
, optBar = allowOptBar
, recordPun = allowRecordPun
, orPat = allowOrPat
, extendedText = allowExtendedText
}

val _ =
Expand Down
16 changes: 12 additions & 4 deletions src/syntax-highlighting/SyntaxHighlighter.sml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ sig
(** Use just lexing info to color a sequence of tokens from a single source.
* Tokens must be in order as they appear in the source.
*)
val highlight: Source.t -> TerminalColorString.t
val highlight: AstAllows.t -> Source.t -> TerminalColorString.t

(** Similar to above, but always succeeds by skipping over characters as
* necessary.
Expand Down Expand Up @@ -92,9 +92,9 @@ struct
end


fun highlight source =
fun highlight allows source =
let
val toks = Lexer.tokens source
val toks = Lexer.tokens allows source
val startOffset = Source.absoluteStartOffset source
val endOffset = Source.absoluteEndOffset source
val wholeSrc = Source.wholeFile source
Expand All @@ -105,6 +105,14 @@ struct

fun fuzzyTokens src =
let
val smlLexerAllows = AstAllows.make
{ topExp = true
, optBar = true
, recordPun = true
, orPat = true
, extendedText = true
}

val originalSrc = src
val startOffset = Source.absoluteStartOffset src
val endOffset = Source.absoluteEndOffset src
Expand All @@ -120,7 +128,7 @@ struct
if offset >= endOffset then
finish acc
else
((case Lexer.next (Source.drop src offset) of
((case Lexer.next smlLexerAllows (Source.drop src offset) of
NONE => finish acc
| SOME tok => loop (tok :: acc) (tokEndOffset tok))
handle _ => loop acc (offset + 1))
Expand Down
4 changes: 4 additions & 0 deletions test/succeed/extended-text.mlb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
$(SML_LIB)/basis/basis.mlb
ann "allowExtendedTextConsts true" in
successor-ml/extended-text.sml
end
2 changes: 2 additions & 0 deletions test/succeed/successor-ml/extended-text.sml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
val a = "🍰"
val x = print "🂡\n"

0 comments on commit 5188a13

Please sign in to comment.