From 97268e9e0efdb97cd4c0aa83f3f998a5267dbff4 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Thu, 8 Feb 2024 09:27:25 -0600 Subject: [PATCH] dockerfile: implement additional expansions Signed-off-by: Hank Donnay --- rhel/dockerfile/generate.go | 5 + rhel/dockerfile/lex.go | 3 - rhel/dockerfile/testdata/Colon.txtar | 3 + rhel/dockerfile/testdata/Default.txtar | 15 ++ rhel/dockerfile/testdata/Error.txtar | 3 + rhel/dockerfile/testdata/Quoting.txtar | 11 ++ rhel/dockerfile/testdata/Vars.txtar | 37 ++++ rhel/dockerfile/varexpand_string.go | 35 ++++ rhel/dockerfile/vars.go | 244 +++++++++++++++++++++---- 9 files changed, 314 insertions(+), 42 deletions(-) create mode 100644 rhel/dockerfile/generate.go create mode 100644 rhel/dockerfile/testdata/Colon.txtar create mode 100644 rhel/dockerfile/testdata/Default.txtar create mode 100644 rhel/dockerfile/testdata/Error.txtar create mode 100644 rhel/dockerfile/testdata/Quoting.txtar create mode 100644 rhel/dockerfile/testdata/Vars.txtar create mode 100644 rhel/dockerfile/varexpand_string.go diff --git a/rhel/dockerfile/generate.go b/rhel/dockerfile/generate.go new file mode 100644 index 000000000..1828cfc68 --- /dev/null +++ b/rhel/dockerfile/generate.go @@ -0,0 +1,5 @@ +package dockerfile + +//go:generate -command stringer go run golang.org/x/tools/cmd/stringer +//go:generate stringer -type itemKind +//go:generate stringer -type varExpand -linecomment diff --git a/rhel/dockerfile/lex.go b/rhel/dockerfile/lex.go index 7182ed725..aabd68186 100644 --- a/rhel/dockerfile/lex.go +++ b/rhel/dockerfile/lex.go @@ -58,9 +58,6 @@ type item struct { type itemKind int -//go:generate -command stringer go run golang.org/x/tools/cmd/stringer -//go:generate stringer -type itemKind - const ( itemError itemKind = iota itemComment diff --git a/rhel/dockerfile/testdata/Colon.txtar b/rhel/dockerfile/testdata/Colon.txtar new file mode 100644 index 000000000..9b04b91da --- /dev/null +++ b/rhel/dockerfile/testdata/Colon.txtar @@ -0,0 +1,3 @@ +error: dockerfile: bad expansion of "error": rogue colon +-- Dockerfile -- +LABEL a ${error::} diff --git a/rhel/dockerfile/testdata/Default.txtar b/rhel/dockerfile/testdata/Default.txtar new file mode 100644 index 000000000..7beec5608 --- /dev/null +++ b/rhel/dockerfile/testdata/Default.txtar @@ -0,0 +1,15 @@ +-- Dockerfile -- +LABEL a ${unset} +LABEL b ${unset=set} +LABEL c ${unset} + +LABEL null= +LABEL d ${null:=reset} +-- Want -- +{ + "a": "", + "b": "set", + "c": "set", + "d": "reset", + "null": "" +} diff --git a/rhel/dockerfile/testdata/Error.txtar b/rhel/dockerfile/testdata/Error.txtar new file mode 100644 index 000000000..27ce45e82 --- /dev/null +++ b/rhel/dockerfile/testdata/Error.txtar @@ -0,0 +1,3 @@ +error: dockerfile: bad expansion of "unset": should error (error if unset or null) +-- Dockerfile -- +LABEL a ${unset:?should error} diff --git a/rhel/dockerfile/testdata/Quoting.txtar b/rhel/dockerfile/testdata/Quoting.txtar new file mode 100644 index 000000000..2beece49f --- /dev/null +++ b/rhel/dockerfile/testdata/Quoting.txtar @@ -0,0 +1,11 @@ +-- Dockerfile -- +ARG a=\" +ARG a=\' +ARG a=\\ +ARG a=\b +LABEL s='single\' quote: \\\b' d="double\" quote: \\\b\e" +-- Want -- +{ + "s": "single' quote: \\\\b", + "d": "double\" quote: \\\b\\e" +} diff --git a/rhel/dockerfile/testdata/Vars.txtar b/rhel/dockerfile/testdata/Vars.txtar new file mode 100644 index 000000000..90e117747 --- /dev/null +++ b/rhel/dockerfile/testdata/Vars.txtar @@ -0,0 +1,37 @@ +-- Dockerfile -- +ARG null='' +LABEL dash_a ${unset-default} +LABEL dash_b ${unset:-default} +LABEL dash_c ${null-default} +LABEL dash_d ${null:-default} +LABEL plus_a ${unset+default} +LABEL plus_b ${unset:+default} +LABEL plus_c ${null+default} +LABEL plus_d ${null:+default} + +ARG var=some-pattern. +LABEL prefix ${var#*e} +LABEL greedyprefix ${var##*e} +LABEL suffix ${var%e*.} +LABEL greedysuffix ${var%%e*.} +LABEL singlechar ${var%?} +LABEL greedysinglechar ${var%%?} +LABEL noreplace ${var#\?} +-- Want -- +{ + "dash_a": "default", + "dash_b": "default", + "dash_c": "", + "dash_d": "default", + "plus_a": "", + "plus_b": "", + "plus_c": "default", + "plus_d": "", + "prefix": "-pattern.", + "greedyprefix": "rn.", + "suffix": "some-patt", + "greedysuffix": "som", + "singlechar": "some-pattern", + "greedysinglechar": "some-pattern", + "noreplace": "some-pattern." +} diff --git a/rhel/dockerfile/varexpand_string.go b/rhel/dockerfile/varexpand_string.go new file mode 100644 index 000000000..dd382fa3f --- /dev/null +++ b/rhel/dockerfile/varexpand_string.go @@ -0,0 +1,35 @@ +// Code generated by "stringer -type varExpand -linecomment"; DO NOT EDIT. + +package dockerfile + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[varExpandSimple-0] + _ = x[varExpandDefault-1] + _ = x[varExpandDefaultNull-2] + _ = x[varSetDefault-3] + _ = x[varSetDefaultNull-4] + _ = x[varExpandAlternate-5] + _ = x[varExpandAlternateNull-6] + _ = x[varErrIfUnset-7] + _ = x[varErrIfUnsetNull-8] + _ = x[varTrimSuffix-9] + _ = x[varTrimSuffixGreedy-10] + _ = x[varTrimPrefix-11] + _ = x[varTrimPrefixGreedy-12] +} + +const _varExpand_name = "simple expansiondefault expansiondefault+null expansionset defaultset default, incl. nullalternate expansionalternate expanxion, incl. nullerror if unseterror if unset or nulltrim suffixgreedy trim suffixtrim prefixgreedy trim prefix" + +var _varExpand_index = [...]uint8{0, 16, 33, 55, 66, 89, 108, 139, 153, 175, 186, 204, 215, 233} + +func (i varExpand) String() string { + if i >= varExpand(len(_varExpand_index)-1) { + return "varExpand(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _varExpand_name[_varExpand_index[i]:_varExpand_index[i+1]] +} diff --git a/rhel/dockerfile/vars.go b/rhel/dockerfile/vars.go index ba9cbc18b..3308c68ee 100644 --- a/rhel/dockerfile/vars.go +++ b/rhel/dockerfile/vars.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "regexp" "strings" "unicode" "unicode/utf8" @@ -27,6 +28,7 @@ type Vars struct { state varState expand varExpand esc bool + nullMod bool varName strings.Builder varExpand strings.Builder } @@ -68,15 +70,15 @@ var _ transform.Transformer = (*Vars)(nil) // expansions. func (v *Vars) Reset() { v.state = varConsume - v.expand = varExNone + v.expand = varExpandSimple v.esc = false + v.nullMod = false v.varName.Reset() v.varExpand.Reset() } // Transform implements transform.Transformer. func (v *Vars) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - varStart := -1 r, sz := rune(0), 0 if v.state == varEmit { // If we're here, we need to emit first thing. @@ -112,18 +114,16 @@ func (v *Vars) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error v.esc = false nDst += utf8.EncodeRune(dst[nDst:], v.escchar) case r == VarMeta: - // Record current position in case the destination is too small - // and the process backs out. - varStart = nSrc + sz v.varName.Reset() v.varExpand.Reset() v.state = varBegin + v.nullMod = false continue } nDst += utf8.EncodeRune(dst[nDst:], r) case varBegin: // This arm is one rune beyond the metacharacter. - v.expand = varExNone + v.expand = varExpandSimple if r == '{' { v.state = varBraceName continue @@ -150,21 +150,32 @@ func (v *Vars) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error // This arm begins on the rune after the opening brace. switch r { case ':': - // POSIX variable expansion has ':' as a modifier on the forms - // of expansion ('-', '=', '+'), but the Dockerfile reference - // only mentions ':-' and ':+'. - peek, psz := utf8.DecodeRune(src[nSrc+sz:]) - switch peek { - case '-': - v.expand = varExDefault - case '+': - v.expand = varExIfSet + v.nullMod = true + continue + case '/': // Non-POSIX: substitutions + return nDst, nSrc, fmt.Errorf("dockerfile: bad expansion of %q: pattern substitution unsupported", v.varName.String()) + case '=': + v.expand = varSetDefault + case '-': + v.expand = varExpandDefault + case '+': + v.expand = varExpandAlternate + case '?': + v.expand = varErrIfUnset + case '%', '#': + switch r { + case '%': // suffix + v.expand = varTrimSuffix + case '#': // prefix + v.expand = varTrimPrefix default: - nSrc = varStart - return nDst, nSrc, fmt.Errorf("bad default spec at %d", nSrc+sz) + panic("unreachable") + } + // If doubled, consume the next rune as well and set greedy mode. + if peek, psz := utf8.DecodeRune(src[nSrc+sz:]); peek == r { + sz += psz + v.expand++ } - sz += psz - v.state = varBraceExpand case '}': n, done := v.emit(dst[nDst:]) if !done { @@ -177,6 +188,19 @@ func (v *Vars) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error default: v.varName.WriteRune(r) } + // Check if the expansion mode should have the modified null handling. + if (r == '-' || r == '+' || r == '?' || r == '=') && v.nullMod { + v.expand++ + v.nullMod = false + } + // If one of the valid expansion modifiers, jump to the next state. + if r == '-' || r == '+' || r == '?' || r == '=' || r == '%' || r == '#' { + v.state = varBraceExpand + } + // If the code ever gets here, there's a rogue colon. + if v.nullMod { + return nDst, nSrc, fmt.Errorf("dockerfile: bad expansion of %q: rogue colon", v.varName.String()) + } case varBraceExpand: // This arm begins on the rune after the expansion specifier. if r != '}' { @@ -184,10 +208,17 @@ func (v *Vars) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error continue } n, done := v.emit(dst[nDst:]) - if !done { + switch { + case !done: nSrc += sz v.state = varEmit return nDst, nSrc, transform.ErrShortDst + case v.state == varError: + return nDst, nSrc, fmt.Errorf("dockerfile: bad expansion of %q: %s (%v)", + v.varName.String(), + v.varExpand.String(), + v.expand, + ) } nDst += n v.state = varConsume @@ -195,6 +226,10 @@ func (v *Vars) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error panic("state botch") } } + if v.esc { + // Ended in a "bare" escape character. Just pass it through. + nDst += utf8.EncodeRune(dst[nDst:], v.escchar) + } if v.state == varBareword && atEOF { // Hit EOF, so variable name is complete. n, done := v.emit(dst[nDst:]) @@ -217,31 +252,154 @@ func validName(r rune) bool { // enough space in dst. func (v *Vars) emit(dst []byte) (int, bool) { dstSz := len(dst) - var w string - res, ok := v.v[v.varName.String()] + // Names from the POSIX explanation of shell parameter expansion. + param := v.varName.String() + word := v.varExpand.String() + val, ok := v.v[param] switch v.expand { - case varExNone: // Use what's returned from the lookup. - w = res - case varExDefault: // Use lookup or default. - if ok { - w = res - break + case varExpandSimple: // Use what's returned from the lookup. + case varExpandDefault: // Use "parameter" if set, "word" if not. + if !ok { + val = word + } + case varExpandDefaultNull: // Use "parameter" if set, "word" if not or set to null. + if !ok || val == "" { + val = word } - w = v.varExpand.String() - case varExIfSet: // Use the expando or nothing. + case varExpandAlternate: // Use "word" if set. if ok { - w = v.varExpand.String() + val = word + } + case varExpandAlternateNull: // Use "word" if set and not null. + if ok && val != "" { + val = word + } + case varErrIfUnset: // Report an error if unset. + if !ok { + v.state = varError + return 0, true + } + case varErrIfUnsetNull: // Report an error if unset or null. + if !ok || val == "" { + v.state = varError + return 0, true + } + case varSetDefault, varSetDefaultNull: + switch v.expand { + case varSetDefault: // Set param if unset. + if !ok { + v.v[param] = word + } + case varSetDefaultNull: // Set "param" if unset or null. + if !ok || val == "" { + v.v[param] = word + } + default: + panic("unreachable") + } + v.expand = varExpandSimple + return v.emit(dst) + case varTrimPrefix, varTrimPrefixGreedy, varTrimSuffix, varTrimSuffixGreedy: + greedy := v.expand == varTrimPrefixGreedy || v.expand == varTrimSuffixGreedy + suffix := v.expand == varTrimSuffix || v.expand == varTrimSuffixGreedy + re, err := convertPattern([]byte(word), greedy, suffix) + if err != nil { + v.state = varError + return 0, true + } + ms := re.FindStringSubmatch(val) + switch len(ms) { + case 0, 1: + // No match, do nothing. + case 2: + if suffix { + val = strings.TrimSuffix(val, ms[1]) + } else { + val = strings.TrimPrefix(val, ms[1]) + } + default: + panic(fmt.Sprintf("pattern compiler is acting up; got: %#v", ms)) } default: panic("expand state botch") } - if dstSz < len(w) { + if dstSz < len(val) { return 0, false } - n := copy(dst, w) + n := copy(dst, val) return n, true } +// ConvertPattern transforms "pat" from (something like) the POSIX sh pattern +// language to a regular expression, then returns the compiled regexp. +// +// The resulting regexp reports the prefix/suffix to be removed as the first +// submatch when executed. +// +// This conversion is tricky, because extra hoops are needed to work around the +// leftmost-first behavior. +func convertPattern(pat []byte, greedy bool, suffix bool) (_ *regexp.Regexp, err error) { + var rePat strings.Builder + rePat.Grow(len(pat) * 2) // 🤷 + // This is needed to "push" a suffix pattern to the correct place. Note that + // the "greediness" is backwards: this is the input that's _not_ the + // pattern. + pad := `(?:.*)` + if greedy { + pad = `(?:.*?)` + } + + rePat.WriteByte('^') + if suffix { + rePat.WriteString(pad) + } + rePat.WriteByte('(') + off := 0 + r, sz := rune(0), 0 + for ; off < len(pat); off += sz { + r, sz = utf8.DecodeRune(pat[off:]) + if r == utf8.RuneError { + err = fmt.Errorf("dockerfile: bad pattern %q", pat) + return + } + switch r { + case '*': // Kleene star + rePat.WriteString(`.*`) + if !suffix && !greedy { + rePat.WriteByte('?') + } + case '?': // Single char + rePat.WriteByte('.') + case '\\': + peek, psz := utf8.DecodeRune(pat[off+sz:]) + switch peek { + case '*', '?', '\\': + // These are metacharacters in both languages, so the escapes should be passed through. + rePat.WriteRune(r) + rePat.WriteRune(peek) + sz += psz + case '}', '/': + // For these escapes, just skip the escape char: we want the literal. + // Handle slash-escapes, even though we don't support unanchored replacements. + default: + return nil, fmt.Errorf(`dockerfile: bad escape '\%c' in pattern %q`, peek, pat) + } + case '$', '(', ')', '+', '.', '[', ']', '^', '{', '|', '}': // Regexp metacharacters + rePat.WriteByte('\\') + fallthrough + default: + rePat.WriteRune(r) + } + } + rePat.WriteByte(')') + if !suffix { + rePat.WriteString(pad) + } + rePat.WriteByte('$') + + return regexp.Compile(rePat.String()) +} + // Assert that this is a SpanningTransformer. var _ transform.SpanningTransformer = (*Vars)(nil) @@ -303,16 +461,24 @@ const ( varBraceName varBraceExpand varEmit + varError ) // VarExpand tracks how the current brace expression expects to be expanded. type varExpand uint8 const ( - // Expand to the named variable or the empty string. - varExNone varExpand = iota - // Expand to the named variable or the provided word. - varExDefault - // Expand to the provided word or the empty string. - varExIfSet + varExpandSimple varExpand = iota // simple expansion + varExpandDefault // default expansion + varExpandDefaultNull // default+null expansion + varSetDefault // set default + varSetDefaultNull // set default, incl. null + varExpandAlternate // alternate expansion + varExpandAlternateNull // alternate expanxion, incl. null + varErrIfUnset // error if unset + varErrIfUnsetNull // error if unset or null + varTrimSuffix // trim suffix + varTrimSuffixGreedy // greedy trim suffix + varTrimPrefix // trim prefix + varTrimPrefixGreedy // greedy trim prefix )