From 54f1120e5e241ff937dae5ed980a7b7a4b6882a1 Mon Sep 17 00:00:00 2001 From: Paul Jolly Date: Sun, 11 Feb 2024 15:05:19 +0000 Subject: [PATCH] preprocessor: add support for ellipsis sanitiser tag directive https://cuelang.org/cl/1176701 introduced support for a new ellipsis sanitiser. This CL introduces very rough support for an ellipsis sanitiser tag that can be specified as part of the doc comment for a statement in a script node. Preprocessor-No-Write-Cache: true Signed-off-by: Paul Jolly Change-Id: I2c99eeecd2bb4653f80dd1dd36205156b927d597 Dispatch-Trailer: {"type":"trybot","CL":1176702,"patchset":8,"ref":"refs/changes/02/1176702/8","targetBranch":"alpha"} --- internal/cmd/preprocessor/cmd/rootfile.go | 22 ++++++---- internal/cmd/preprocessor/cmd/sanitisers.go | 6 +++ internal/cmd/preprocessor/cmd/script_node.go | 34 ++++++++++++++++ internal/cmd/preprocessor/cmd/tag.go | 40 +++++++++++++++++++ .../testdata/execute_ellipsis_sanitiser.txtar | 12 ++++++ internal/cmd/preprocessor/cmd/txtar_node.go | 33 +++------------ 6 files changed, 113 insertions(+), 34 deletions(-) diff --git a/internal/cmd/preprocessor/cmd/rootfile.go b/internal/cmd/preprocessor/cmd/rootfile.go index 8cc0191a7..7767f0657 100644 --- a/internal/cmd/preprocessor/cmd/rootfile.go +++ b/internal/cmd/preprocessor/cmd/rootfile.go @@ -678,14 +678,22 @@ func (m *multiStepScript) run() (runerr error) { m.fatalf("%v: failed to parse exit code from %q at position %v in output: %v\n%s", m, exitCodeStr, len(out)-len(walk)-len(exitCodeStr)-1, err, out) } - for _, s := range m.page.config.Sanitisers { - matched, err := s.matches(stmt) - if err != nil { - m.fatalf("%v: failed to determine if sanitiser should apply for %q: %v", m, stmt.Cmd, err) - } - if !matched { - continue + var sans []sanitiser + if stmt.sanitisers != nil { + sans = stmt.sanitisers + } else { + for _, s := range m.page.config.Sanitisers { + matched, err := s.matches(stmt) + if err != nil { + m.fatalf("%v: failed to determine if sanitiser should apply for %q: %v", m, stmt.Cmd, err) + } + if !matched { + continue + } + sans = append(sans, s) } + } + for _, s := range sans { if err := s.sanitise(stmt); err != nil { m.fatalf("%v: failed to sanitise output for %q: %v", m, stmt.Cmd, err) } diff --git a/internal/cmd/preprocessor/cmd/sanitisers.go b/internal/cmd/preprocessor/cmd/sanitisers.go index ca1e3a4b8..84daeb71b 100644 --- a/internal/cmd/preprocessor/cmd/sanitisers.go +++ b/internal/cmd/preprocessor/cmd/sanitisers.go @@ -196,6 +196,8 @@ type ellipsisSanitiser struct { Start int `json:"start"` } +func (e *ellipsisSanitiser) init() error { return nil } + func (e *ellipsisSanitiser) sanitise(cmd *commandStmt) error { if strings.Count(cmd.Output, "\n") <= e.Start { return nil @@ -211,3 +213,7 @@ type ellipsisSanitiserMatcher struct { ellipsisSanitiser matchSpec } + +func (e *ellipsisSanitiserMatcher) init() error { + return e.matchSpec.init() +} diff --git a/internal/cmd/preprocessor/cmd/script_node.go b/internal/cmd/preprocessor/cmd/script_node.go index 713377e5e..1e1c09d45 100644 --- a/internal/cmd/preprocessor/cmd/script_node.go +++ b/internal/cmd/preprocessor/cmd/script_node.go @@ -19,6 +19,7 @@ import ( "encoding/base64" "fmt" "regexp" + "strconv" "strings" "mvdan.cc/sh/v3/syntax" @@ -127,15 +128,47 @@ func (s *scriptNode) validate() { var sb strings.Builder if err := s.rf.shellPrinter.Print(&sb, stmt); err != nil { s.errorf("%v: failed to print statement at %v: %v", s, stmt.Position, err) + continue } cmdStmt.Cmd = sb.String() sb.Reset() if err := s.rf.shellPrinter.Print(&sb, &doc); err != nil { s.errorf("%v: failed to print doc comment for stmt at %v: %v", s, stmt.Position, err) + continue } cmdStmt.Doc = sb.String() + // Now check if there are any known tag-based sanitiers directives + // + // TODO: we really need a different mechanism to gather the tags (and + // their args) which might appear here, and then parse them. For now this + // is a bit of a hack. + args, matched, err := findTag([]byte(cmdStmt.Doc), tagEllipsis, "") + if err != nil { + s.errorf("%v: failed to search for ellipsis tag: %v", s, err) + continue + } + if matched { + // TODO: this needs a more principled approach. Very hacky for now. + + // Must be only one arg + var start int + switch len(args) { + case 0: + case 1: + start64, err := strconv.ParseInt(args[0], 10, 32) + if err != nil { + s.errorf("%v: failed to parse integer from %s argument %q: %v", s, tagEllipsis, args[0], err) + continue + } + start = int(start64) + } + cmdStmt.sanitisers = []sanitiser{&ellipsisSanitiser{ + Start: start, + }} + } + // Revert the negated state for completeness given // we set stmt as part of cmdStmt for sanitiser etc // checks where it might matter @@ -161,6 +194,7 @@ type commandStmt struct { ExitCode int `json:"exitCode"` Output string `json:"output"` outputFence string + sanitisers []sanitiser } var tagPrefix = regexp.MustCompile(`^#\S`) diff --git a/internal/cmd/preprocessor/cmd/tag.go b/internal/cmd/preprocessor/cmd/tag.go index a40420f0d..a0ac166bf 100644 --- a/internal/cmd/preprocessor/cmd/tag.go +++ b/internal/cmd/preprocessor/cmd/tag.go @@ -14,6 +14,12 @@ package cmd +import ( + "bufio" + "bytes" + "fmt" +) + const ( // tagNorun is the tag used in a txtar-based directive like code or script // to indicate that that node should not be run. For an upload directive, it @@ -40,4 +46,38 @@ const ( // #location top-left top-right bottom // tagLocation = "location" + + tagEllipsis = "ellipsis" ) + +// findTag searches for the first #$key (or #$key($arg) if arg is non empty) +// tag line in src. Tags are # prefixed lines where the # at the beginning of +// the line must be followed by a non-space character. args contains the +// contents of the quote-aware args that follow the tag name. present indicates +// whether the tag identified by key was present or not. err will be non-nil if +// there were errors in parsing the arguments to a tag. +// +// TODO: work out whether we want to handle comments in tag lines (which are +// themselves comments already). +// +// TODO: add an explicit test for when arg != "" +func findTag(src []byte, key, arg string) (args []string, present bool, err error) { + prefix := "#" + key + if arg != "" { + prefix += "(" + arg + ")" + } + sc := bufio.NewScanner(bytes.NewReader(src)) + lineNo := 1 + for sc.Scan() { + line := bytes.TrimSpace(sc.Bytes()) + if after, found := bytes.CutPrefix(bytes.TrimSpace(line), []byte(prefix)); found { + args, err := parseLineArgs(string(after)) + if err != nil { + err = fmt.Errorf("%d %w", lineNo, err) + } + return args, true, err + } + lineNo++ + } + return nil, false, nil +} diff --git a/internal/cmd/preprocessor/cmd/testdata/execute_ellipsis_sanitiser.txtar b/internal/cmd/preprocessor/cmd/testdata/execute_ellipsis_sanitiser.txtar index 0d60c42b2..ff7f28041 100644 --- a/internal/cmd/preprocessor/cmd/testdata/execute_ellipsis_sanitiser.txtar +++ b/internal/cmd/preprocessor/cmd/testdata/execute_ellipsis_sanitiser.txtar @@ -40,6 +40,11 @@ content: dir: page: { >{{{with script "en" "five"}}} >seq 1 10 >{{{end}}} +> +>{{{with script "en" "tag"}}} +>#ellipsis 2 +>seq 1 20 +>{{{end}}} -- golden/hugo/content/en/dir/index.md -- --- title: JSON Superset @@ -59,3 +64,10 @@ $ seq 1 10 5 ... ``` + +```text { title="TERMINAL" codeToCopy="c2VxIDEgMjAK" } +$ seq 1 20 +1 +2 +... +``` diff --git a/internal/cmd/preprocessor/cmd/txtar_node.go b/internal/cmd/preprocessor/cmd/txtar_node.go index b55ff28f7..62cadc908 100644 --- a/internal/cmd/preprocessor/cmd/txtar_node.go +++ b/internal/cmd/preprocessor/cmd/txtar_node.go @@ -15,7 +15,6 @@ package cmd import ( - "bufio" "bytes" "fmt" "io" @@ -57,39 +56,19 @@ func (t *txtarNode) writeSourceTo(b *bytes.Buffer) { p("%send%s", t.rf.page.config.LeftDelim, t.rf.page.config.RightDelim) } -// tag searches for the first #$key (or #$key($arg) if arg is non empty) tag -// line in the comment section of s's txtar archive. Tags are # prefixed lines -// where the # at the beginning of the line must be followed by a non-space -// character. args contains the contents of the quote-aware args that follow -// the tag name. present indicates whether the tag identified by key was -// present or not. err will be non-nil if there were errors in parsing the -// arguments to a tag. -// -// Note that this searches the sourceArchive. +// tag searches for the first tag directive named key in the sourceArchive. +// See findTag for more details on the search. // // TODO: work out whether we want to handle comments in tag lines (which are // themselves comments already). // // TODO: add an explicit test for when arg != "" func (t *txtarNode) tag(key, arg string) (args []string, present bool, err error) { - prefix := "#" + key - if arg != "" { - prefix += "(" + arg + ")" - } - sc := bufio.NewScanner(bytes.NewReader(t.sourceArchive.Comment)) - lineNo := 1 - for sc.Scan() { - line := bytes.TrimSpace(sc.Bytes()) - if after, found := bytes.CutPrefix(bytes.TrimSpace(line), []byte(prefix)); found { - args, err := parseLineArgs(string(after)) - if err != nil { - err = fmt.Errorf("%s:%d %w", t.label, lineNo, err) - } - return args, true, err - } - lineNo++ + args, present, err = findTag(t.sourceArchive.Comment, key, arg) + if err != nil { + err = fmt.Errorf("%s: %v", t.label, err) } - return nil, false, nil + return args, present, err } // parseLineArgs is factored out of the testscript code. We use the same logic