Skip to content

Commit

Permalink
Basic f-strings (intepolated strings) (#258)
Browse files Browse the repository at this point in the history
* Squashed commit of the following:

commit 1dbafe3
Author: Mikhail Yakshin <[email protected]>
Date:   Thu Feb 29 15:08:24 2024 +0000

    ExpressionsSpec: f-strings: added test with newline in the middle, fixed name of test with double quote in the middle

commit 8605f3f
Author: Mikhail Yakshin <[email protected]>
Date:   Thu Feb 29 15:04:40 2024 +0000

    Lexical: moved fstringItem + fstringChar from Expressions; Expressions: fixed rep -> repX to avoid whitespace problem

commit 17f9e40
Author: Mikhail Yakshin <[email protected]>
Date:   Sat Oct 14 21:07:58 2023 +0100

    Added similar escaped quote+space test for double-quoted string

commit c0083fb
Author: Mikhail Yakshin <[email protected]>
Date:   Sat Oct 14 20:48:19 2023 +0100

    Added tests suggested in PR review: quote in f-string, regular string in f-string, f-string in f-string

commit 1b22258
Author: Mikhail Yakshin <[email protected]>
Date:   Sat Oct 14 20:38:19 2023 +0100

    Apply suggestions from code review

    Co-authored-by: Petr Pučil <[email protected]>

commit 97ffceb
Author: Mikhail Yakshin <[email protected]>
Date:   Sat Oct 14 08:50:41 2023 +0100

    Added basic interpolated string (f-string) translation into target languages, mostly working off concatenation and existing other-types-to-string conversions. Supports only integers and strings now. Added basic unit tests.

    * GoTranslator: given non-string nature, added custom implementation using `fmt.Sprintf`
    * CommonLiterals: split generation of string "body" (without quotes) and adding quotes to it
    * CommonMethods: param name cleanup

commit 1c7c759
Author: Mikhail Yakshin <[email protected]>
Date:   Sat Oct 14 08:23:12 2023 +0100

    Implemented basic f-string parsing in expression language

* Adjust to adhere to Scala 2.13 + FastParse 2.3.3

* GoTranslator: fixed as per discussion in PR to replace everything with just `%v`

* Expressions: used raw sequence operators which bypass whitespace allowance, added lots of tests to ensure that it works as intended

* GoTranslator: fix problem with `%` in the raw string segment in f-strings; modified common test to accommodate this pattern

* ExpressionValidator: allow validation of Ast.expr.InterpolatedStr

* doInterpolatedStringLiteral: handle empty string special case
  • Loading branch information
GreyCat authored Mar 2, 2024
1 parent 9f26830 commit b47078c
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 63 deletions.
201 changes: 151 additions & 50 deletions jvm/src/test/scala/io/kaitai/struct/exprlang/ExpressionsSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -189,76 +189,91 @@ class ExpressionsSpec extends AnyFunSpec {
)
}

// Boolean literals
it("parses true") {
Expressions.parse("true") should be (Bool(true))
}
describe("boolean literals") {
it("parses true") {
Expressions.parse("true") should be(Bool(true))
}

it("parses false") {
Expressions.parse("false") should be (Bool(false))
}
it("parses false") {
Expressions.parse("false") should be(Bool(false))
}

it("parses truer") {
Expressions.parse("truer") should be (Name(identifier("truer")))
it("parses truer") {
Expressions.parse("truer") should be(Name(identifier("truer")))
}
}

// Boolean operations
it("parses not foo") {
Expressions.parse("not foo") should be (
UnaryOp(
Ast.unaryop.Not,
Name(identifier("foo"))
describe("boolean operations") {
it("parses not foo") {
Expressions.parse("not foo") should be(
UnaryOp(
Ast.unaryop.Not,
Name(identifier("foo"))
)
)
)
}
}

it("parses note_len") {
Expressions.parse("note_len") should be (Name(identifier("note_len")))
}
it("parses note_len") {
Expressions.parse("note_len") should be(Name(identifier("note_len")))
}

it("parses notnot") {
Expressions.parse("notnot") should be (Name(identifier("notnot")))
}
it("parses notnot") {
Expressions.parse("notnot") should be(Name(identifier("notnot")))
}

it("parses not not true") {
Expressions.parse("not not true") should be (
UnaryOp(
Ast.unaryop.Not,
it("parses not not true") {
Expressions.parse("not not true") should be(
UnaryOp(
Ast.unaryop.Not,
Bool(true)
UnaryOp(
Ast.unaryop.Not,
Bool(true)
)
)
)
)
}
}

// String literals
it("parses simple string") {
Expressions.parse("\"abc\"") should be (Str("abc"))
}
describe("strings literals") {
it("parses simple string") {
Expressions.parse("\"abc\"") should be(Str("abc"))
}

it("parses interpolated string with newline") {
Expressions.parse("\"abc\\ndef\"") should be (Str("abc\ndef"))
}
it("parses simple string with space at the start") {
Expressions.parse("\" abc\"") should be(Str(" abc"))
}

it("parses non-interpolated string with newline") {
Expressions.parse("'abc\\ndef'") should be (Str("abc\\ndef"))
}
it("parses simple string with space at the end") {
Expressions.parse("\"abc \"") should be(Str("abc "))
}

it("parses interpolated string with zero char") {
Expressions.parse("\"abc\\0def\"") should be (Str("abc\u0000def"))
}
it("parses interpolated string with newline") {
Expressions.parse("\"abc\\ndef\"") should be(Str("abc\ndef"))
}

it("parses non-interpolated string with zero char") {
Expressions.parse("'abc\\0def'") should be (Str("abc\\0def"))
}
it("parses non-interpolated string with newline") {
Expressions.parse("'abc\\ndef'") should be(Str("abc\\ndef"))
}

it("parses interpolated string with octal char") {
Expressions.parse("\"abc\\75def\"") should be (Str("abc=def"))
}
it("parses interpolated string with zero char") {
Expressions.parse("\"abc\\0def\"") should be(Str("abc\u0000def"))
}

it("parses interpolated string with hex unicode char") {
Expressions.parse("\"abc\\u21bbdef\"") should be (Str("abc\u21bbdef"))
it("parses non-interpolated string with zero char") {
Expressions.parse("'abc\\0def'") should be(Str("abc\\0def"))
}

it("parses interpolated string with octal char") {
Expressions.parse("\"abc\\75def\"") should be(Str("abc=def"))
}

it("parses interpolated string with hex unicode char") {
Expressions.parse("\"abc\\u21bbdef\"") should be(Str("abc\u21bbdef"))
}

it("parses double-quoted string with double quote") {
Expressions.parse("\"this \\\" is a quote\"") should be(Str("this \" is a quote"))
}
}

// Casts
Expand Down Expand Up @@ -388,5 +403,91 @@ class ExpressionsSpec extends AnyFunSpec {
it("parses foo.bar") {
Expressions.parse("foo.bar") should be (Attribute(Name(identifier("foo")),identifier("bar")))
}

describe("f-strings") {
it("parses f-string with just a string") {
Expressions.parse("f\"abc\"") should be(InterpolatedStr(Seq(
Str("abc")
)))
}

it("parses f-string with just one expression") {
Expressions.parse("f\"{123}\"") should be(InterpolatedStr(Seq(
IntNum(123)
)))
}

it("parses f-string with string + expression") {
Expressions.parse("f\"foo={123}\"") should be(InterpolatedStr(Seq(
Str("foo="),
IntNum(123)
)))
}

it("parses f-string with expression + string") {
Expressions.parse("f\"{123}=abc\"") should be(InterpolatedStr(Seq(
IntNum(123),
Str("=abc")
)))
}

it("parses f-string with str + expression + str") {
Expressions.parse("f\"abc={123}=def\"") should be(InterpolatedStr(Seq(
Str("abc="),
IntNum(123),
Str("=def")
)))
}

it("parses f-string string with newline in the middle") {
Expressions.parse("f\"abc\\ndef\"") should be(InterpolatedStr(Seq(Str("abc\ndef"))))
}

it("parses f-string with double quote in the middle") {
Expressions.parse("f\"this \\\" is a quote\"") should be(InterpolatedStr(Seq(
Str("this \" is a quote")
)))
}

it("parses f-string with string in it") {
Expressions.parse("f\"abc{\"def\"}ghi\"") should be(InterpolatedStr(Seq(
Str("abc"),
Str("def"),
Str("ghi"),
)))
}

it("parses f-string with space at the start") {
Expressions.parse("f\" foo\"") should be(InterpolatedStr(Seq(
Str(" foo")
)))
}

it("parses f-string with space at the end") {
Expressions.parse("f\"foo \"") should be(InterpolatedStr(Seq(
Str("foo ")
)))
}

it("parses f-string with double quote at the start") {
Expressions.parse("f\"\\\" is a quote\"") should be(InterpolatedStr(Seq(
Str("\" is a quote")
)))
}

it("parses f-string with space and double quote at the start") {
Expressions.parse("f\" \\\" is a quote\"") should be(InterpolatedStr(Seq(
Str(" \" is a quote")
)))
}

it("parses f-string with f-string in it") {
Expressions.parse("f\"abc{f\"def\"}ghi\"") should be(InterpolatedStr(Seq(
Str("abc"),
InterpolatedStr(Seq(Str("def"))),
Str("ghi"),
)))
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ package io.kaitai.struct.translators
import io.kaitai.struct.datatype.DataType
import io.kaitai.struct.datatype.DataType._
import io.kaitai.struct.exprlang.{Ast, Expressions}
import io.kaitai.struct.format.{ClassSpec, FixedSized}
import io.kaitai.struct.format.{ClassSpec, FixedSized, Identifier}
import io.kaitai.struct.languages._
import io.kaitai.struct.languages.components.{CppImportList, LanguageCompilerStatic}
import io.kaitai.struct.{ImportList, RuntimeConfig, StringLanguageOutputWriter}
import org.scalatest.Tag
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers._
import io.kaitai.struct.format.Identifier

class TranslatorSpec extends AnyFunSuite {

Expand Down Expand Up @@ -677,6 +676,25 @@ class TranslatorSpec extends AnyFunSuite {
// sizeof of fixed user type
everybody("bitsizeof<block>", "56", CalcIntType)

// f-strings
everybodyExcept("f\"abc\"", "\"abc\"", Map(
CppCompiler -> "std::string(\"abc\")",
PythonCompiler -> "u\"abc\""
), CalcStrType)

full("f\"abc{1}%def\"", CalcIntType, CalcStrType, Map[LanguageCompilerStatic, String](
CppCompiler -> "std::string(\"abc\") + kaitai::kstream::to_string(1) + std::string(\"%def\")",
CSharpCompiler -> "\"abc\" + Convert.ToString((long) (1), 10) + \"%def\"",
GoCompiler -> "fmt.Sprintf(\"abc%v%%def\", 1)",
JavaCompiler -> "\"abc\" + Long.toString(1, 10) + \"%def\"",
JavaScriptCompiler -> "\"abc\" + (1).toString(10) + \"%def\"",
LuaCompiler -> "\"abc\" + tostring(1) + \"%def\"",
PerlCompiler -> "\"abc\" . sprintf('%d', 1) . \"\\%def\"",
PHPCompiler -> "\"abc\" . strval(1) . \"%def\"",
PythonCompiler -> "u\"abc\" + str(1) + u\"%def\"",
RubyCompiler -> "\"abc\" + 1.to_s(10) + \"%def\"",
))

/**
* Checks translation of expression `src` into target languages
*
Expand Down
1 change: 1 addition & 0 deletions shared/src/main/scala/io/kaitai/struct/exprlang/Ast.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ object Ast {
/** For internal use in the compiler. It cannot appear in an AST parsed from a user-supplied string. */
case class InternalName(id: Identifier) extends expr
case class List(elts: Seq[expr]) extends expr
case class InterpolatedStr(elts: Seq[expr]) extends expr

/**
* Implicit declaration of ordering, so expressions can be used for ordering operations, e.g.
Expand Down
10 changes: 10 additions & 0 deletions shared/src/main/scala/io/kaitai/struct/exprlang/Expressions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ object Expressions {
def FLOAT_NUMBER[$: P] = Lexical.floatnumber
def STRING[$: P]: P[String] = Lexical.stringliteral

def fstring[$: P]: P[Ast.expr.InterpolatedStr] = P("f\"" ~~/ fstringElement.repX ~~ "\"").map(Ast.expr.InterpolatedStr)
def fstringElement[$: P]: P[Ast.expr] = P(
formatExpr |
Lexical.fstringItem.repX(1).
map(_.mkString).
map(Ast.expr.Str)
)
def formatExpr[$: P]: P[Ast.expr] = P("{" ~/ test ~ "}")

def test[$: P]: P[Ast.expr] = P( or_test ~ ("?" ~ test ~ ":" ~ test).? ).map {
case (x, None) => x
case (condition, Some((ifTrue, ifFalse))) => Ast.expr.IfExp(condition, ifTrue, ifFalse)
Expand Down Expand Up @@ -119,6 +128,7 @@ object Expressions {
enumByName |
byteSizeOfType |
bitSizeOfType |
fstring |
STRING.rep(1).map(_.mkString).map(Ast.expr.Str) |
NAME.map((x) => x.name match {
case "true" => Ast.expr.Bool(true)
Expand Down
4 changes: 4 additions & 0 deletions shared/src/main/scala/io/kaitai/struct/exprlang/Lexical.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ object Lexical {
def doublestring[$: P]: P[String] = P("\"" ~/ doublestringitem.rep ~ "\"").map(_.mkString)
def doublestringitem[$: P] = P( doublestringchar.! | escapeseq )
def doublestringchar[$: P] = P( CharsWhile(!"\\\"".contains(_)) )

def fstringItem[$: P] = P(fstringChar.! | Lexical.escapeseq)
def fstringChar[$: P] = P(CharsWhile(!"{\\\"".contains(_)))

def escapeseq[$: P] = P( "\\" ~/ (quotedchar | quotedoctal | quotedhex) )

val QUOTED_CC = Map(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ abstract class BaseTranslator(val provider: TypeProvider)
doFloatLiteral(n)
case Ast.expr.Str(s) =>
doStringLiteral(s)
case Ast.expr.InterpolatedStr(s) =>
doInterpolatedStringLiteral(s)
case Ast.expr.Bool(n) =>
doBoolLiteral(n)
case Ast.expr.EnumById(enumType, id, inType) =>
Expand Down Expand Up @@ -204,4 +206,23 @@ abstract class BaseTranslator(val provider: TypeProvider)
// for the language
def anyField(value: Ast.expr, attrName: String): String =
s"${translate(value)}.${doName(attrName)}"

// f-strings
def doInterpolatedStringLiteral(exprs: Seq[Ast.expr]): String =
if (exprs.isEmpty) {
doStringLiteral("")
} else {
exprs.map(anyToStr).mkString(" + ")
}

def anyToStr(value: Ast.expr): String = {
detectType(value) match {
case _: IntType =>
intToStr(value, Ast.expr.IntNum(10))
case _: StrType =>
translate(value)
case otherType =>
throw new UnsupportedOperationException(s"unable to convert $otherType to string in format string (only integers and strings are supported)")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,27 @@ trait CommonLiterals {
def doIntLiteral(n: BigInt): String = n.toString
def doFloatLiteral(n: Any): String = n.toString

def doStringLiteral(s: String): String = {
val encoded = s.toCharArray.map((code) =>
if (code <= 0xff) {
strLiteralAsciiChar(code)
} else {
strLiteralUnicode(code)
}
).mkString
"\"" + encoded + "\""
}
/**
* Generates string literal enclosed in double quotes.
* @param s string to put in as literal
* @return string literal
*/
def doStringLiteral(s: String): String =
"\"" + doStringLiteralBody(s) + "\""

/**
* Generates body of string literal for a given string, without enclosing quotes.
* @param s string to put in as literal
* @return body of a string literal
*/
def doStringLiteralBody(s: String): String = s.toCharArray.map((code) =>
if (code <= 0xff) {
strLiteralAsciiChar(code)
} else {
strLiteralUnicode(code)
}
).mkString

def doBoolLiteral(n: Boolean): String = n.toString

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ abstract trait CommonMethods[T] extends TypeDetector {

def bytesToStr(value: Ast.expr, encoding: String): T

def intToStr(value: Ast.expr, num: Ast.expr): T
def intToStr(value: Ast.expr, base: Ast.expr): T

def floatToInt(value: Ast.expr): T

Expand Down
Loading

0 comments on commit b47078c

Please sign in to comment.