diff --git a/Makefile b/Makefile index c3dc83c77221..24180a9d54c1 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,7 @@ makefiles += \ tests/functional/git-hashing/local.mk \ tests/functional/dyn-drv/local.mk \ tests/functional/test-libstoreconsumer/local.mk \ + tests/functional/repl_characterization/local.mk \ tests/functional/plugins/local.mk endif diff --git a/tests/functional/repl_characterization/.gitignore b/tests/functional/repl_characterization/.gitignore new file mode 100644 index 000000000000..4c6412c2f379 --- /dev/null +++ b/tests/functional/repl_characterization/.gitignore @@ -0,0 +1 @@ +test-repl-characterization diff --git a/tests/functional/repl_characterization/data/basic.ast b/tests/functional/repl_characterization/data/basic.ast new file mode 100644 index 000000000000..d494b00aa684 --- /dev/null +++ b/tests/functional/repl_characterization/data/basic.ast @@ -0,0 +1,11 @@ +Commentary "meow meow meow" +Command "command" +Output "output output one" +Output "" +Output "" +Output "output output two" +Commentary "meow meow" +Command "command two" +Output "output output output" +Commentary "commentary" +Output "output output output" diff --git a/tests/functional/repl_characterization/data/basic.test b/tests/functional/repl_characterization/data/basic.test new file mode 100644 index 000000000000..d6b8427b4e94 --- /dev/null +++ b/tests/functional/repl_characterization/data/basic.test @@ -0,0 +1,11 @@ +meow meow meow + nix-repl> command + output output one + + + output output two +meow meow + nix-repl> command two + output output output +commentary + output output output diff --git a/tests/functional/repl_characterization/data/basic_repl.test b/tests/functional/repl_characterization/data/basic_repl.test new file mode 100644 index 000000000000..a8dea6d7cbfd --- /dev/null +++ b/tests/functional/repl_characterization/data/basic_repl.test @@ -0,0 +1,60 @@ + nix-repl> 1 + 1 + 2 + + nix-repl> :doc builtins.head + Synopsis: builtins.head list + + Return the first element of a list; abort evaluation if + the argument isn’t a list or is an empty list. You can + test whether a list is empty by comparing it with []. + + nix-repl> f = a: "" + a + +Expect the trace to not contain any traceback: + + nix-repl> f 2 + error: + … while evaluating a path segment + at «string»:1:10: + 1| a: "" + a + | ^ + + error: cannot coerce an integer to a string: 2 + + nix-repl> :te + showing error traces + +Expect the trace to have traceback: + + nix-repl> f 2 + error: + … from call site + at «string»:1:1: + 1| f 2 + | ^ + + … while calling anonymous lambda + at «string»:1:2: + 1| a: "" + a + | ^ + + … while evaluating a path segment + at «string»:1:10: + 1| a: "" + a + | ^ + + error: cannot coerce an integer to a string: 2 + +Turning it off should also work: + + nix-repl> :te + not showing error traces + + nix-repl> f 2 + error: + … while evaluating a path segment + at «string»:1:10: + 1| a: "" + a + | ^ + + error: cannot coerce an integer to a string: 2 diff --git a/tests/functional/repl_characterization/local.mk b/tests/functional/repl_characterization/local.mk new file mode 100644 index 000000000000..e7b36c99c3b6 --- /dev/null +++ b/tests/functional/repl_characterization/local.mk @@ -0,0 +1,17 @@ +programs += test-repl-characterization + +test-repl-characterization_DIR := $(d) + +test-repl-characterization_ENV := _NIX_TEST_UNIT_DATA=$(d)/data + +# do not install +test-repl-characterization_INSTALL_DIR := + +test-repl-characterization_SOURCES := \ + $(wildcard $(d)/*.cc) \ + +test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support + +test-repl-characterization_LIBS = libutil libutil-test-support + +test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS) diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc new file mode 100644 index 000000000000..28e4706a304f --- /dev/null +++ b/tests/functional/repl_characterization/repl_characterization.cc @@ -0,0 +1,101 @@ +#include + +#include +#include +#include +#include + +#include "test-session.hh" +#include "file-descriptor.hh" +#include "processes.hh" +#include "source-accessor.hh" +#include "tests/characterization.hh" +#include "tests/cli-literate-parser.hh" +#include "tests/terminal-code-eater.hh" + +using namespace std::string_literals; + +namespace nix { + +static constexpr const char * REPL_PROMPT = "nix-repl> "; + +// ASCII ENQ character +static constexpr const char * AUTOMATION_PROMPT = "\x05"; + +static std::string_view trimOutLog(std::string_view outLog) +{ + const std::string trailer = "\n"s + AUTOMATION_PROMPT; + if (outLog.ends_with(trailer)) { + outLog.remove_suffix(trailer.length()); + } + return outLog; +} + +class ReplSessionTest : public CharacterizationTest +{ + Path unitTestData = getUnitTestData(); + +public: + Path goldenMaster(std::string_view testStem) const override + { + return unitTestData + "/" + testStem; + } + + void runReplTest(std::string_view const & content, std::vector extraArgs = {}) const + { + auto syntax = CLILiterateParser::parse(REPL_PROMPT, content); + + Strings args{"--quiet", "repl", "--quiet", "--extra-experimental-features", "repl-automation"}; + args.insert(args.end(), extraArgs.begin(), extraArgs.end()); + + // TODO: why the fuck does this need two --quiets + auto process = RunningProcess::start("nix", args); + auto session = TestSession{AUTOMATION_PROMPT, std::move(process)}; + + const auto expectedOutput = CLILiterateParser::unparse(REPL_PROMPT, syntax, 0); + + for (auto & bit : syntax) { + if (bit->kind() != CLILiterateParser::NodeKind::COMMAND) { + continue; + } + + if (!session.waitForPrompt()) { + ASSERT_TRUE(false); + } + session.runCommand(bit->text()); + } + if (!session.waitForPrompt()) { + ASSERT_TRUE(false); + } + session.close(); + + auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, trimOutLog(session.outLog), 0); + + CLILiterateParser::tidyOutputForComparison(parsedOutLog); + CLILiterateParser::tidyOutputForComparison(syntax); + + ASSERT_EQ(parsedOutLog, syntax); + } +}; + +TEST_F(ReplSessionTest, parses) +{ + writeTest("basic.ast", [this]() { + const std::string content = readFile(goldenMaster("basic.test")); + auto parser = CLILiterateParser{REPL_PROMPT}; + parser.feed(content); + + std::ostringstream out{}; + for (auto & bit : parser.syntax()) { + out << bit->print() << "\n"; + } + return out.str(); + }); +} + +TEST_F(ReplSessionTest, repl_basic) +{ + readTest("basic_repl.test", [this](std::string input) { runReplTest(input); }); +} + +}; diff --git a/tests/functional/repl_characterization/test-session.cc b/tests/functional/repl_characterization/test-session.cc new file mode 100644 index 000000000000..1c697482ad98 --- /dev/null +++ b/tests/functional/repl_characterization/test-session.cc @@ -0,0 +1,163 @@ +#include + +#include "test-session.hh" +#include "util.hh" + +namespace nix { + +static constexpr const bool DEBUG_REPL_PARSER = false; + +struct DebugChar +{ + char c; +}; + +static std::ostream & operator<<(std::ostream & s, DebugChar c) +{ + if (isprint(c.c)) { + s << static_cast(c.c); + } else { + s << std::hex << "0x" << (static_cast(c.c) & 0xff) << std::dec; + } + return s; +} + +RunningProcess RunningProcess::start(std::string executable, Strings args) +{ + args.push_front(executable); + + Pipe procStdin{}; + Pipe procStdout{}; + + procStdin.create(); + procStdout.create(); + + // This is separate from runProgram2 because we have different IO requirements + pid_t pid = startProcess([&]() { + if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1) + throw SysError("dupping stdout"); + if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1) + throw SysError("dupping stdin"); + procStdin.writeSide.close(); + procStdout.readSide.close(); + if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) + throw SysError("dupping stderr"); + execvp(executable.c_str(), stringsToCharPtrs(args).data()); + throw SysError("exec did not happen"); + }); + + procStdout.writeSide.close(); + procStdin.readSide.close(); + + return RunningProcess{ + .pid = pid, + .procStdin = std::move(procStdin), + .procStdout = std::move(procStdout), + }; +} + +static std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s) +{ + switch (s) { + case ReplOutputParser::State::Prompt: + os << "prompt"; + break; + case ReplOutputParser::State::Context: + os << "context"; + break; + } + return os; +} + +void ReplOutputParser::transition(State new_state, char responsible_char, bool wasPrompt) +{ + if (DEBUG_REPL_PARSER) { + std::cerr << "transition " << new_state << " for " << DebugChar{responsible_char} + << (wasPrompt ? " [prompt]" : "") << "\n"; + } + state = new_state; + pos_in_prompt = 0; +} + +bool ReplOutputParser::feed(char c) +{ + if (c == '\n') { + transition(State::Prompt, c); + return false; + } + switch (state) { + case State::Context: + break; + case State::Prompt: + if (pos_in_prompt == prompt.length() - 1 && prompt[pos_in_prompt] == c) { + transition(State::Context, c, true); + return true; + } + if (pos_in_prompt >= prompt.length() - 1 || prompt[pos_in_prompt] != c) { + transition(State::Context, c); + break; + } + pos_in_prompt++; + break; + } + return false; +} + +/** Waits for the prompt and then returns if a prompt was found */ +bool TestSession::waitForPrompt() +{ + std::vector buf(1024); + + for (;;) { + ssize_t res = read(proc.procStdout.readSide.get(), buf.data(), buf.size()); + + if (res < 0) { + throw SysError("read"); + } + if (res == 0) { + return false; + } + + bool foundPrompt = false; + for (ssize_t i = 0; i < res; ++i) { + // foundPrompt = foundPrompt || outputParser.feed(buf[i]); + bool wasEaten = true; + eater.feed(buf[i], [&](char c) { + wasEaten = false; + foundPrompt = outputParser.feed(buf[i]) || foundPrompt; + + outLog.push_back(c); + }); + + if (DEBUG_REPL_PARSER) { + std::cerr << "raw " << DebugChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n"; + } + } + + if (foundPrompt) { + return true; + } + } +} + +void TestSession::close() +{ + proc.procStdin.close(); + proc.procStdout.close(); +} + +void TestSession::runCommand(std::string command) +{ + if (DEBUG_REPL_PARSER) + std::cerr << "runCommand " << command << "\n"; + command += "\n"; + // We have to feed a newline into the output parser, since Nix might not + // give us a newline before a prompt in all cases (it might clear line + // first, e.g.) + outputParser.feed('\n'); + // Echo is disabled, so we have to make our own + outLog.append(command); + writeFull(proc.procStdin.writeSide.get(), command, false); +} + +}; diff --git a/tests/functional/repl_characterization/test-session.hh b/tests/functional/repl_characterization/test-session.hh new file mode 100644 index 000000000000..aa05b1ae24c5 --- /dev/null +++ b/tests/functional/repl_characterization/test-session.hh @@ -0,0 +1,70 @@ +#pragma once +///@file + +#include +#include + +#include "file-descriptor.hh" +#include "processes.hh" +#include "tests/terminal-code-eater.hh" + +namespace nix { + +struct RunningProcess +{ + pid_t pid; + Pipe procStdin; + Pipe procStdout; + + static RunningProcess start(std::string executable, Strings args); +}; + +/** DFA that catches repl prompts */ +class ReplOutputParser +{ +public: + ReplOutputParser(std::string prompt) + : prompt(prompt) + { + assert(!prompt.empty()); + } + /** Feeds in a character and returns whether this is an open prompt */ + bool feed(char c); + + enum class State { + Prompt, + Context, + }; + +private: + State state = State::Prompt; + size_t pos_in_prompt = 0; + std::string const prompt; + + void transition(State state, char responsible_char, bool wasPrompt = false); +}; + +struct TestSession +{ + RunningProcess proc; + ReplOutputParser outputParser; + TerminalCodeEater eater; + std::string outLog; + std::string prompt; + + TestSession(std::string prompt, RunningProcess && proc) + : proc(std::move(proc)) + , outputParser(prompt) + , eater({}) + , outLog({}) + , prompt(prompt) + { + } + + bool waitForPrompt(); + + void runCommand(std::string command); + + void close(); +}; +}; diff --git a/tests/unit/libutil-support/local.mk b/tests/unit/libutil-support/local.mk index 5f7835c9f612..1c0ff8f6bae6 100644 --- a/tests/unit/libutil-support/local.mk +++ b/tests/unit/libutil-support/local.mk @@ -14,6 +14,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc) libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -libutil-test-support_LIBS = libutil +# libexpr so we can steal their string printer from print.cc +libutil-test-support_LIBS = libutil libexpr libutil-test-support_LDFLAGS := $(THREAD_LDFLAGS) -lrapidcheck diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc new file mode 100644 index 000000000000..7b601f3c1238 --- /dev/null +++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc @@ -0,0 +1,238 @@ +#include "cli-literate-parser.hh" +#include "box_ptr.hh" +#include "types.hh" +#include "source-accessor.hh" +#include +#include +#include + +using namespace std::string_literals; + +namespace nix { + +static constexpr const bool DEBUG_PARSER = false; + +constexpr auto CLILiterateParser::state_debug(ParserState s) -> const char * +{ + switch (s) { + case ParserState::INDENT: + return "indent"; + case ParserState::COMMENTARY: + return "commentary"; + case ParserState::PROMPT: + return "prompt"; + case ParserState::COMMAND: + return "command"; + case ParserState::OUTPUT_LINE: + return "output_line"; + default: + assert(false); + } +} + +void PrintTo(std::vector const & nodes, std::ostream * os) +{ + for (auto & node : nodes) { + *os << node->print() << "\\n"; + } +} + +auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) + -> std::vector +{ + CLILiterateParser p{std::move(prompt), indent}; + p.feed(input); + return CLILiterateParser::intoSyntax(std::move(p)); +} + +auto CLILiterateParser::intoSyntax(CLILiterateParser && inst) -> std::vector +{ + std::vector ret = std::move(inst.syntax_); + inst.syntax_.clear(); + return ret; +} + +CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent) + : prompt_(prompt) + , indent_(indent) + , state_(indent == 0 ? ParserState::PROMPT : ParserState::INDENT) + , last_was_output_(false) + , state_pos_(0) + , line_accumulator_({}) + , syntax_(std::vector{}) +{ + assert(!prompt.empty()); +} + +void CLILiterateParser::feed(char8_t c) +{ + if (DEBUG_PARSER) { + std::cout << state_debug(state_) << " " << (char) c << "\n"; + } + + if (c == '\n') { + on_newline(); + return; + } + + switch (state_) { + case ParserState::INDENT: + if (c == ' ') { + if (++state_pos_ >= indent_) { + transition(ParserState::PROMPT); + } + } else { + transition(ParserState::COMMENTARY); + line_accumulator_.push_back(c); + } + break; + + case ParserState::PROMPT: + if (state_pos_ >= prompt_.length()) { + line_accumulator_.clear(); + transition(ParserState::COMMAND); + } else if (c == prompt_[state_pos_]) { + // good prompt character + ++state_pos_; + } else { + // didn't match the prompt, so it must have actually been output. + transition(ParserState::OUTPUT_LINE); + } + + line_accumulator_.push_back(c); + break; + + case ParserState::COMMENTARY: + case ParserState::COMMAND: + case ParserState::OUTPUT_LINE: + line_accumulator_.push_back(c); + break; + } +} + +void CLILiterateParser::on_newline() +{ + ParserState last_state = state_; + bool new_last_was_output = false; + + switch (last_state) { + case ParserState::INDENT: + // a newline following output is considered part of that output + if (last_was_output_) { + new_last_was_output = true; + syntax_.push_back(make_box_ptr(std::move(line_accumulator_))); + break; + } + [[fallthrough]]; + case ParserState::COMMENTARY: + syntax_.push_back(make_box_ptr(std::move(line_accumulator_))); + break; + case ParserState::COMMAND: + syntax_.push_back(make_box_ptr(std::move(line_accumulator_))); + break; + case ParserState::OUTPUT_LINE: + new_last_was_output = true; + [[fallthrough]]; + case ParserState::PROMPT: + // INDENT followed by newline is also considered a blank output line + syntax_.push_back(make_box_ptr(std::move(line_accumulator_))); + break; + } + transition(ParserState::INDENT); + line_accumulator_.clear(); + last_was_output_ = new_last_was_output; +} + +void CLILiterateParser::feed(std::string_view s) +{ + for (char8_t ch : s) { + feed(ch); + } +} + +void CLILiterateParser::transition(ParserState new_state) +{ + // When we expect INDENT and we are parsing without indents, commentary + // cannot exist, so we want to transition directly into PROMPT before + // resuming normal processing. + if (new_state == ParserState::INDENT && indent_ == 0) { + new_state = ParserState::PROMPT; + } + + state_pos_ = 0; + state_ = new_state; +} + +auto CLILiterateParser::syntax() const -> std::vector const & +{ + return syntax_; +} + +auto CLILiterateParser::unparse(const std::string & prompt, const std::vector & syntax, size_t indent) + -> std::string +{ + std::string indent_str(indent, ' '); + std::ostringstream out{}; + + for (auto & node : syntax) { + switch (node->kind()) { + case NodeKind::COMMENTARY: + // TODO: should not ignore commentary + break; + case NodeKind::COMMAND: + out << indent_str << prompt << node->text() << "\n"; + break; + case NodeKind::OUTPUT: + out << indent_str << node->text() << "\n"; + break; + } + } + + return out.str(); +} + +void CLILiterateParser::tidyOutputForComparison(std::vector & syntax) +{ + std::vector newSyntax{}; + + // Eat trailing newlines, so assume that the very end was actually a command + bool lastWasCommand = true; + bool newLastWasCommand = true; + for (auto it = std::make_move_iterator(syntax.rbegin()); it != std::make_move_iterator(syntax.rend()); ++it) { + ErasedNode item = *it; + + lastWasCommand = newLastWasCommand; + // chomp commentary + if (item->kind() == NodeKind::COMMENTARY) { + continue; + } + + if (item->kind() == NodeKind::COMMAND) { + newLastWasCommand = true; + + if (item->text() == "") { + // chomp empty commands + continue; + } + } + + if (item->kind() == NodeKind::OUTPUT) { + std::string trimmedText = boost::algorithm::trim_right_copy(item->text()); + if (lastWasCommand && trimmedText == "") { + // chomp empty text above commands + continue; + } + + // real output, stop chomping + newLastWasCommand = false; + + item = make_box_ptr(std::move(trimmedText)); + } + newSyntax.push_back(std::move(item)); + } + + std::reverse(newSyntax.begin(), newSyntax.end()); + syntax = std::move(newSyntax); +} + +}; diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh new file mode 100644 index 000000000000..9f1bac6f9a90 --- /dev/null +++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh @@ -0,0 +1,167 @@ +#pragma once +///@file + +#include +#include +#include +#include + +#include "box_ptr.hh" +#include "libexpr/print.hh" +#include "libutil/fmt.hh" + +namespace nix { +/* + * A DFA parser for literate test cases for CLIs. + * + * FIXME: implement merging of these, so you can auto update cases that have + * comments. + * + * Format: + * COMMENTARY + * INDENT PROMPT COMMAND + * INDENT OUTPUT + * + * e.g. + * commentary commentary commentary + * nix-repl> :t 1 + * an integer + * + * Yields: + * Commentary "commentary commentary commentary" + * Command ":t 1" + * Output "an integer" + * + * Note: one Output line is generated for each line of the sources, because + * this is effectively necessary to be able to align them in the future to + * auto-update tests. + */ +class CLILiterateParser +{ +public: + + enum class NodeKind { + COMMENTARY, + COMMAND, + OUTPUT, + }; + + class Node + { + protected: + virtual auto _equals(Node const & other) const -> bool = 0; + public: + virtual auto kind() const -> NodeKind = 0; + virtual auto text() const -> std::string = 0; + virtual auto print() const -> std::string = 0; + auto operator==(Node const & other) const -> bool + { + return _equals(other); + } + auto operator!=(Node const & other) const -> bool + { + return !(*this == other); + }; + virtual ~Node(){}; + }; + + // TODO: move out of line, it is probably an ODR violation as-is. +#define DEFINE_NODE_KIND(NAME, NAME_CAPS) \ + class NAME : public virtual Node \ + { \ + protected: \ + virtual auto _equals(Node const & other) const -> bool override \ + { \ + return other.kind() == this->kind() && other.text() == this->text(); \ + } \ + public: \ + const std::string content; \ + NAME(std::string content) \ + : content(content) \ + { \ + } \ +\ + virtual auto kind() const -> NodeKind override \ + { \ + return NodeKind::NAME_CAPS; \ + } \ + virtual auto text() const -> std::string override \ + { \ + return content; \ + } \ + virtual auto print() const -> std::string override \ + { \ + std::ostringstream s{}; \ + printLiteralString(s, this->content); \ + return fmt(#NAME " %1%", s.str()); \ + } \ +\ + virtual ~NAME() override = default; \ + } + + DEFINE_NODE_KIND(Commentary, COMMENTARY); + DEFINE_NODE_KIND(Command, COMMAND); + DEFINE_NODE_KIND(Output, OUTPUT); +#undef DEFINE_NODE_KIND + + using ErasedNode = box_ptr; + + CLILiterateParser(std::string prompt, size_t indent = 2); + + auto syntax() const -> std::vector const &; + + /** Feeds a character into the parser */ + void feed(char8_t c); + + /** Feeds a string into the parser */ + void feed(std::string_view s); + + /** Parses an input in a non-streaming fashion */ + static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector; + + /** Returns, losslessly, the string that would have generated a syntax tree */ + static auto unparse(std::string const & prompt, std::vector const & syntax, size_t indent = 2) + -> std::string; + + /** Consumes a CLILiterateParser and gives you the syntax out of it */ + static auto intoSyntax(CLILiterateParser && inst) -> std::vector; + + /** Tidies syntax to remove trailing whitespace from outputs and remove any + * empty prompts */ + static void tidyOutputForComparison(std::vector & syntax); + +private: + + enum class ParserState { + INDENT, + COMMENTARY, + PROMPT, + COMMAND, + OUTPUT_LINE, + }; + + constexpr static auto state_debug(ParserState) -> const char *; + + const std::string prompt_; + const size_t indent_; + + /** State of the DFA */ + ParserState state_; + + /** Last line was output, so we consider a blank to be part of the output */ + bool last_was_output_; + + /** Position within the state; meaning depends on the state in question */ + size_t state_pos_; + + std::string line_accumulator_; + + std::vector syntax_; + + void transition(ParserState new_state); + void on_newline(); +}; + +// Override gtest printing for lists of nodes +void PrintTo(std::vector const & nodes, std::ostream * os); +}; diff --git a/tests/unit/libutil-support/tests/terminal-code-eater.cc b/tests/unit/libutil-support/tests/terminal-code-eater.cc new file mode 100644 index 000000000000..dfe0523a7260 --- /dev/null +++ b/tests/unit/libutil-support/tests/terminal-code-eater.cc @@ -0,0 +1,94 @@ +#include "terminal-code-eater.hh" +#include +#include +#include + +namespace nix { + +static constexpr const bool DEBUG_EATER = false; + +static void dbgChar(const char * prefix, char c) +{ + if (!DEBUG_EATER) { + return; + } + std::cerr << prefix << " "; + if (isprint(c)) { + std::cerr << static_cast(c); + } else { + std::cerr << std::hex << "0x" << (static_cast(c) & 0xff) << std::dec; + } + std::cerr << "\n"; +} + +void TerminalCodeEater::feed(char c, std::function on_char) +{ + auto isParamChar = [](char v) -> bool { return v >= 0x30 && v <= 0x3f; }; + auto isIntermediateChar = [](char v) -> bool { return v >= 0x20 && v <= 0x2f; }; + auto isFinalChar = [](char v) -> bool { return v >= 0x40 && v <= 0x7e; }; + dbgChar("eater", c); + + switch (state) { + case State::ExpectESC: + switch (c) { + case '\e': + transition(State::ExpectESCSeq); + return; + // Just eat \r, since it is part of clearing a line + case '\r': + return; + } + dbgChar("eater uneat", c); + on_char(c); + break; + case State::ExpectESCSeq: + switch (c) { + // CSI + case '[': + transition(State::InCSIParams); + return; + default: + transition(State::ExpectESC); + return; + } + break; + // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences + // A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e] + // ^ params ^ intermediates ^ final byte + case State::InCSIParams: + if (isFinalChar(c)) { + transition(State::ExpectESC); + return; + } else if (isIntermediateChar(c)) { + transition(State::InCSIIntermediates); + return; + } else if (isParamChar(c)) { + return; + } else { + // Corrupt escape sequence? Throw an assert, for now. + // transition(State::ExpectESC); + assert(false && "Corrupt terminal escape sequence"); + return; + } + break; + case State::InCSIIntermediates: + if (isFinalChar(c)) { + transition(State::ExpectESC); + return; + } else if (isIntermediateChar(c)) { + return; + } else { + // Corrupt escape sequence? Throw an assert, for now. + // transition(State::ExpectESC); + assert(false && "Corrupt terminal escape sequence in intermediates"); + return; + } + break; + } +} + +void TerminalCodeEater::transition(State new_state) +{ + state = new_state; +} +}; diff --git a/tests/unit/libutil-support/tests/terminal-code-eater.hh b/tests/unit/libutil-support/tests/terminal-code-eater.hh new file mode 100644 index 000000000000..d904bcc2069d --- /dev/null +++ b/tests/unit/libutil-support/tests/terminal-code-eater.hh @@ -0,0 +1,29 @@ +#pragma once +/// @file + +#include + +namespace nix { + +/** DFA that eats terminal escapes + * + * See: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html + */ +class TerminalCodeEater +{ +public: + void feed(char c, std::function on_char); + +private: + enum class State { + ExpectESC, + ExpectESCSeq, + InCSIParams, + InCSIIntermediates, + }; + + State state = State::ExpectESC; + + void transition(State new_state); +}; +};