From 203e46d0d1a7255d9f0922ca494df07ecac4f73a Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Wed, 28 Aug 2024 00:26:24 +0800 Subject: [PATCH 1/5] feat: define error type --- .gitmodules | 3 +++ CMakeLists.txt | 3 ++- deps/fmt | 1 + src/CMakeLists.txt | 2 ++ src/error.cpp | 36 ++++++++++++++++++++++++++++++++++ src/error.hpp | 49 ++++++++++++++++++++++++++++++++++++++++++++++ src/parser.cpp | 6 ++++-- src/parser.hpp | 11 +++++------ 8 files changed, 102 insertions(+), 9 deletions(-) create mode 160000 deps/fmt create mode 100644 src/error.cpp create mode 100644 src/error.hpp diff --git a/.gitmodules b/.gitmodules index 00cc27e..cf0d9f5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "deps/Catch2"] path = deps/Catch2 url = https://github.com/catchorg/Catch2.git +[submodule "deps/fmt"] + path = deps/fmt + url = https://github.com/fmtlib/fmt diff --git a/CMakeLists.txt b/CMakeLists.txt index d7c3147..27bbaf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,4 +18,5 @@ set(CMAKE_CXX_STANDARD_REQUIRED on) add_subdirectory(src) add_subdirectory(tests) -add_subdirectory(deps/Catch2) \ No newline at end of file +add_subdirectory(deps/Catch2) +add_subdirectory(deps/fmt) \ No newline at end of file diff --git a/deps/fmt b/deps/fmt new file mode 160000 index 0000000..0379bf3 --- /dev/null +++ b/deps/fmt @@ -0,0 +1 @@ +Subproject commit 0379bf3a5d52d8542aec1874677c9df5ff9ba5f9 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74a5c81..e5fe42c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,3 +3,5 @@ file(GLOB sources "*.cpp") add_library(myxml ${sources}) target_include_directories(myxml PUBLIC ".") + +target_link_libraries(myxml fmt::fmt) \ No newline at end of file diff --git a/src/error.cpp b/src/error.cpp new file mode 100644 index 0000000..465dbf3 --- /dev/null +++ b/src/error.cpp @@ -0,0 +1,36 @@ +#include "error.hpp" + +namespace myxml +{ + ParseError::ParseError(std::string message) + : message(message) + { + } + + const char *ParseError::what() const noexcept + { + return this->message.c_str(); + } + + SyntaxError::SyntaxError(std::string message) + : ParseError(message) + { + } + + const char *SyntaxError::what() const noexcept + { + this->fullMessage = "SyntaxError: " + std::string(ParseError::what()); + return this->fullMessage.c_str(); + } + + UnexpectedEndOfInput::UnexpectedEndOfInput() + : ParseError("End of input") + { + } + + const char *UnexpectedEndOfInput::what() const noexcept + { + this->fullMessage = "UnexpectedEndOfInput: " + std::string(ParseError::what()); + return this->fullMessage.c_str(); + } +} \ No newline at end of file diff --git a/src/error.hpp b/src/error.hpp new file mode 100644 index 0000000..8e72f65 --- /dev/null +++ b/src/error.hpp @@ -0,0 +1,49 @@ +#pragma once +#include + +namespace myxml +{ + class ParseError : public std::exception + { + protected: + std::string message; + + public: + ParseError(std::string message); + + virtual const char *what() const noexcept override; + }; + + /** + * The input data do not conform to the expected grammar rule. Including: + * 1. Missing or mismatch symbols. For example, missing a '>' in the end of a tag. + * 2. Unexpected token. Encounter a token that is not expected in the context. For example: extra semicolon. + * ... + */ + class SyntaxError : public ParseError + { + private: + // store message after being concated + mutable std::string fullMessage; + + public: + SyntaxError(std::string); + + virtual const char *what() const noexcept override; + }; + + /** + * e.g. EOF + */ + class UnexpectedEndOfInput : public ParseError + { + private: + // store message after being concated + mutable std::string fullMessage; + + public: + UnexpectedEndOfInput(); + + virtual const char *what() const noexcept override; + }; +} \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index 19ee987..b091c13 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,4 +1,6 @@ +#include #include "parser.hpp" +#include "error.hpp" namespace myxml { @@ -55,12 +57,12 @@ namespace myxml std::optional Parser::parseIdent() { if (this->peekChar() == std::nullopt) - return std::nullopt; + throw UnexpectedEndOfInput(); std::size_t begin = this->offset; // validate heading character if (auto head = this->peekChar(); !head || (!std::isalpha(*head) && head != '_')) { - return std::nullopt; + throw SyntaxError(fmt::format("element name which starts with {} is invalid.", *head)); } std::size_t len = 0; while (begin + len < this->buffer.length() && diff --git a/src/parser.hpp b/src/parser.hpp index 16d3bff..1d5bfc3 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -41,16 +41,15 @@ namespace myxml */ void skipWhiteSpaces(); - // return and not consume current character std::optional peekChar(); - // return and not consume next n characters std::optional peekNextNChars(int); - // return and consume current character std::optional nextChar(); - std::optional nextNChars(int); - // return and consume a ident - // will not consume ident if failed + + /** + * Parse an identity. + * @return if find no identity, return `std::nullptr` + */ std::optional parseIdent(); // return and consume a string `"..."` // will not consume string if failed From d190c2db01dfd3803412bf995e2617ab33e61033 Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Wed, 28 Aug 2024 01:05:02 +0800 Subject: [PATCH 2/5] feat: apply error type to parser --- src/error.cpp | 27 ++++--- src/error.hpp | 25 +++++-- src/parser.cpp | 188 +++++++++++++++++++++++++------------------------ src/parser.hpp | 43 ++++++----- 4 files changed, 158 insertions(+), 125 deletions(-) diff --git a/src/error.cpp b/src/error.cpp index 465dbf3..1a3e413 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -9,7 +9,13 @@ namespace myxml const char *ParseError::what() const noexcept { - return this->message.c_str(); + this->fullMessage = this->prefix() + this->message; + return this->fullMessage.c_str(); + } + + const char *SyntaxError::prefix() const + { + return "Syntax Error: "; } SyntaxError::SyntaxError(std::string message) @@ -17,20 +23,23 @@ namespace myxml { } - const char *SyntaxError::what() const noexcept + const char *SemanticError::prefix() const { - this->fullMessage = "SyntaxError: " + std::string(ParseError::what()); - return this->fullMessage.c_str(); + return "Sematic Error: "; } - UnexpectedEndOfInput::UnexpectedEndOfInput() - : ParseError("End of input") + SemanticError::SemanticError(std::string message) + : ParseError(message) { } - const char *UnexpectedEndOfInput::what() const noexcept + const char *UnexpectedEndOfInput::prefix() const + { + return "Unexpected End of Input: "; + } + + UnexpectedEndOfInput::UnexpectedEndOfInput() + : ParseError("End of input") { - this->fullMessage = "UnexpectedEndOfInput: " + std::string(ParseError::what()); - return this->fullMessage.c_str(); } } \ No newline at end of file diff --git a/src/error.hpp b/src/error.hpp index 8e72f65..36a44aa 100644 --- a/src/error.hpp +++ b/src/error.hpp @@ -5,8 +5,13 @@ namespace myxml { class ParseError : public std::exception { + private: + virtual const char *prefix() const = 0; + protected: std::string message; + // store message after being concated with prefix + mutable std::string fullMessage; public: ParseError(std::string message); @@ -23,13 +28,22 @@ namespace myxml class SyntaxError : public ParseError { private: - // store message after being concated - mutable std::string fullMessage; + virtual const char *prefix() const; public: SyntaxError(std::string); + }; - virtual const char *what() const noexcept override; + /** + * + */ + class SemanticError : public ParseError + { + private: + virtual const char *prefix() const; + + public: + SemanticError(std::string); }; /** @@ -38,12 +52,9 @@ namespace myxml class UnexpectedEndOfInput : public ParseError { private: - // store message after being concated - mutable std::string fullMessage; + virtual const char *prefix() const; public: UnexpectedEndOfInput(); - - virtual const char *what() const noexcept override; }; } \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index b091c13..698e963 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,3 +1,4 @@ +#include #include #include "parser.hpp" #include "error.hpp" @@ -54,7 +55,7 @@ namespace myxml return nchars; } - std::optional Parser::parseIdent() + std::string Parser::parseIdent() { if (this->peekChar() == std::nullopt) throw UnexpectedEndOfInput(); @@ -74,11 +75,15 @@ namespace myxml return this->buffer.substr(begin, len); } - std::optional Parser::parseStringLiteral() + std::string Parser::parseStringLiteral() { + if (!this->peekChar()) + { + throw UnexpectedEndOfInput(); + } if (this->peekChar() != '"') { - return std::nullopt; + throw SyntaxError(fmt::format("expected '\"' at the beginning of string literal, find {}", this->peekChar())); } std::size_t cur = this->offset; // this->offset points to `"` while (cur + 1 < this->buffer.length() && this->buffer[cur + 1] != '"') @@ -87,7 +92,7 @@ namespace myxml } if (cur + 1 >= this->buffer.length()) { // if jump out due to length limit - return std::nullopt; + throw SyntaxError(fmt::format("missing closing double quote for string literal")); } auto literal = this->buffer.substr(this->offset + 1, cur - this->offset); this->offset = cur + 2; // cur + 1 -> `"` @@ -97,24 +102,34 @@ namespace myxml std::optional> Parser::parseAttribute() { this->skipWhiteSpaces(); - std::pair attri; - if (auto key = this->parseIdent(); key && this->nextChar() == '=') + std::pair attr; + try { - attri.first = *key; - if (auto value = this->parseStringLiteral(); value) + auto key = this->parseIdent(); + if (this->nextChar() != '=') { - attri.second = *value; - return attri; + throw SyntaxError(fmt::format("expected '=' after attribute name")); } + attr.first = key; + } + catch (SyntaxError e) + { // Only SyntaxError in parseIdent is incorrect heading character + return std::nullopt; } - return std::nullopt; + catch (UnexpectedEndOfInput e) + { // There must be `>` or else after all attributes + throw e; + } + auto value = this->parseStringLiteral(); + attr.second = value; + return attr; } - std::optional> Parser::parseText() + std::shared_ptr Parser::parseText() { if (!this->peekChar()) { - return std::nullopt; + throw UnexpectedEndOfInput(); } std::size_t begin = this->offset; std::size_t len = 0; @@ -125,13 +140,50 @@ namespace myxml } if (this->buffer[begin + len] != '<') { // if jump out of while loop due to length limit - return std::nullopt; + throw SyntaxError(fmt::format("expected '<' after text")); } this->offset += len; return std::shared_ptr(new Text(this->buffer.substr(begin, len))); } - std::optional> Parser::parseElementWithHeader(ElementTag header) + std::optional Parser::ParseTag() + { + if (this->nextChar() != '<') + { + return std::nullopt; + } + ElementTag tag; + if (this->peekChar() == '/') + { + tag.type = ElementTag::ClosingType::Closing; + this->nextChar(); + } + this->skipWhiteSpaces(); + auto name = this->parseIdent(); + tag.name = name; + this->skipWhiteSpaces(); + while (auto attr = this->parseAttribute()) + { + tag.attris.insert(*attr); + } + this->skipWhiteSpaces(); + if (this->peekChar() == '/') + { + if (tag.type != ElementTag::ClosingType::Open) + { + throw SyntaxError(fmt::format("unexpected ending '/' found in closing tag")); + } + tag.type = ElementTag::ClosingType::Closed; + this->nextChar(); + } + if (this->nextChar() != '>') + { + throw SyntaxError(fmt::format("expected '>' at the end of the tag")); + } + return tag; + } + + std::shared_ptr Parser::parseElementWithHeader(ElementTag header) { auto elem = Element::New(); elem->SetName(header.name); @@ -141,19 +193,16 @@ namespace myxml { case '<': { - auto tag = this->ParseTag(); + auto tag = this->ParseTag(); // impossible to be std::nullopt + assert(tag); switch (tag->type) { case ElementTag::ClosingType::Open: - if (auto child = this->parseElementWithHeader(*tag); child) - { - elem->InsertAtEnd(*child); - } - else - { - return std::nullopt; - } + { + auto child = this->parseElementWithHeader(*tag); + elem->InsertAtEnd(child); break; + } case ElementTag::ClosingType::Closed: { auto child = Element::New(); @@ -168,7 +217,7 @@ namespace myxml case ElementTag::ClosingType::Closing: if (tag->name != elem->GetName()) { - return std::nullopt; + throw SyntaxError(fmt::format("")); } if (!header.attris.empty()) { @@ -176,50 +225,17 @@ namespace myxml } return elem; default: - return std::nullopt; + assert(false && "Invalid ElementTag Type"); } break; } default: - if (auto text = this->parseText(); text) - { - elem->InsertAtEnd(*text); - } - else - { - return std::nullopt; - } + auto text = this->parseText(); + elem->InsertAtEnd(text); break; } } - return std::nullopt; - } - - std::optional Parser::parseDeclaration() - { - if (this->peekNextNChars(5) != "nextNChars(5); - std::map attrs; - while (auto attr = this->parseAttribute()) - { - attrs.insert(*attr); - } - this->skipWhiteSpaces(); - if (this->nextNChars(2) != "?>") - { - return std::nullopt; - } - if (auto decl = Declaration::BuildFromAttrs(attrs); decl) - { - return decl; - } - else - { - return std::nullopt; - } + throw UnexpectedEndOfInput(); } std::optional> Parser::ParseElement() @@ -242,52 +258,40 @@ namespace myxml return this->parseElementWithHeader(*tag); } } - return std::nullopt; + else + { + return std::nullopt; + } } - std::optional Parser::ParseTag() + std::optional Parser::parseDeclaration() { - if (this->nextChar() != '<') + if (this->peekNextNChars(5) != "peekChar() == '/') - { - tag.type = ElementTag::ClosingType::Closing; - this->nextChar(); - } - this->skipWhiteSpaces(); - if (auto name = this->parseIdent(); name) - { - tag.name = *name; - } - else + this->nextNChars(5); + std::map attrs; + while (auto attr = this->parseAttribute()) { - return std::nullopt; + attrs.insert(*attr); } this->skipWhiteSpaces(); - while (auto attri = this->parseAttribute()) + if (this->nextNChars(2) != "?>") { - tag.attris.insert(*attri); + throw SyntaxError(fmt::format("expected \"?>\" at end of xml declaration")); } - if (this->peekChar() == '/') + if (auto decl = Declaration::BuildFromAttrs(attrs); decl) { - if (tag.type != ElementTag::ClosingType::Open) - { - return std::nullopt; - } - tag.type = ElementTag::ClosingType::Closed; - this->nextChar(); + return decl; } - if (this->nextChar() != '>') + else { - return std::nullopt; + throw SemanticError(fmt::format("declaration has incorrect attributes")); } - return tag; } - std::optional Parser::ParseDocument() + Document Parser::ParseDocument() { Document document; if (auto decl = this->parseDeclaration(); decl) @@ -300,7 +304,7 @@ namespace myxml } else { - return std::nullopt; + throw SemanticError(fmt::format("missing root element in xml document")); } return document; } diff --git a/src/parser.hpp b/src/parser.hpp index 1d5bfc3..46a48f5 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -48,34 +48,43 @@ namespace myxml /** * Parse an identity. - * @return if find no identity, return `std::nullptr` + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` if an invalid character occurs. + */ + std::string parseIdent(); + /** + * Parse a string literal + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` if missing any of `"` + */ + std::string parseStringLiteral(); + /** + * @returns std::nullopt if find no attribute + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` if the following chars do not confront to `key="value"` format */ - std::optional parseIdent(); - // return and consume a string `"..."` - // will not consume string if failed - std::optional parseStringLiteral(); - // return and consume an attribute `key="value"` std::optional> parseAttribute(); - // return and consume pcdata - // will not consume pcdate if failed - std::optional> parseText(); + std::shared_ptr parseText(); // return the entire element // will consume buffer if failed - std::optional> parseElementWithHeader(ElementTag header); - // return the declartion - // will not consume buffer if failed + std::shared_ptr parseElementWithHeader(ElementTag header); + /** + * @returns std::nullopt if not starts with ` parseDeclaration(); public: // return and consume current element // will consume buffer if failed std::optional> ParseElement(); - // return and consume current tag - // will consume buffer if failed + /** + * @returns std::nullopt if no heading `<` + * @throws `SyntaxError` if the heading character is `<` and the trailing characters are in incorrect format + */ std::optional ParseTag(); - // return and consume whole document - // will consume buffer if failed - std::optional ParseDocument(); + Document ParseDocument(); Parser() = delete; explicit Parser(std::string_view); }; From ab2512ba6a7d8b9fbf2d0245d8532d89dd7340ca Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Wed, 28 Aug 2024 13:05:35 +0800 Subject: [PATCH 3/5] fix(parser.cpp): throw error in try block --- src/parser.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/parser.cpp b/src/parser.cpp index 698e963..08dac60 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -103,14 +103,10 @@ namespace myxml { this->skipWhiteSpaces(); std::pair attr; + std::string key; try { - auto key = this->parseIdent(); - if (this->nextChar() != '=') - { - throw SyntaxError(fmt::format("expected '=' after attribute name")); - } - attr.first = key; + key = this->parseIdent(); } catch (SyntaxError e) { // Only SyntaxError in parseIdent is incorrect heading character @@ -120,6 +116,11 @@ namespace myxml { // There must be `>` or else after all attributes throw e; } + if (this->nextChar() != '=') + { + throw SyntaxError(fmt::format("expected '=' after attribute name")); + } + attr.first = key; auto value = this->parseStringLiteral(); attr.second = value; return attr; From 015c119f8b967c498a177ce64af06bef8a4f8f07 Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Wed, 28 Aug 2024 13:09:31 +0800 Subject: [PATCH 4/5] fix(parser.cpp): add unexpected closing tag error --- src/parser.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/parser.cpp b/src/parser.cpp index 08dac60..0c4de46 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -83,7 +83,7 @@ namespace myxml } if (this->peekChar() != '"') { - throw SyntaxError(fmt::format("expected '\"' at the beginning of string literal, find {}", this->peekChar())); + throw SyntaxError(fmt::format("expected '\"' at the beginning of string literal, find {}", *this->peekChar())); } std::size_t cur = this->offset; // this->offset points to `"` while (cur + 1 < this->buffer.length() && this->buffer[cur + 1] != '"') @@ -258,6 +258,10 @@ namespace myxml { return this->parseElementWithHeader(*tag); } + else // Closing + { + throw SyntaxError(fmt::format("unexpected closing tag")); + } } else { From c8b5d672b0f771fa8cf430ceafef73f0bc6bc772 Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Wed, 28 Aug 2024 13:19:56 +0800 Subject: [PATCH 5/5] chore(paser.hpp): add method comment --- src/parser.hpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/parser.hpp b/src/parser.hpp index 46a48f5..75501df 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -33,19 +33,18 @@ namespace myxml std::string buffer; std::size_t offset; - /** - * TODO: - * Define Exceptions , so for all parsing method, - * return std::nullopt means `not this one`, - * and throw exception means `parsing error` - */ - void skipWhiteSpaces(); std::optional peekChar(); std::optional peekNextNChars(int); std::optional nextChar(); std::optional nextNChars(int); + /** + * For all parsing method, + * return std::nullopt means `not this one` and will not consume buffer. + * Throw exception means `parsing error` and should stop immediately + */ + /** * Parse an identity. * @throws `UnexpectedEndOfInput` @@ -59,31 +58,39 @@ namespace myxml */ std::string parseStringLiteral(); /** - * @returns std::nullopt if find no attribute + * @returns std::nullopt if find no attribute * @throws `UnexpectedEndOfInput` * @throws `SyntaxError` if the following chars do not confront to `key="value"` format */ std::optional> parseAttribute(); std::shared_ptr parseText(); - // return the entire element - // will consume buffer if failed + /** + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` + * @throws `SemanticError` + */ std::shared_ptr parseElementWithHeader(ElementTag header); /** * @returns std::nullopt if not starts with ` parseDeclaration(); public: - // return and consume current element - // will consume buffer if failed std::optional> ParseElement(); /** * @returns std::nullopt if no heading `<` - * @throws `SyntaxError` if the heading character is `<` and the trailing characters are in incorrect format + * @throws `SyntaxError` if the heading character is `<` and the trailing characters are in incorrect format + * @throws `UnexpectedEndOfInput` if missing name */ std::optional ParseTag(); + /** + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` + * @throws `SemanticError` + */ Document ParseDocument(); Parser() = delete; explicit Parser(std::string_view);