diff --git a/.gitmodules b/.gitmodules index 00cc27e..cf0d9f5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "deps/Catch2"] path = deps/Catch2 url = https://github.com/catchorg/Catch2.git +[submodule "deps/fmt"] + path = deps/fmt + url = https://github.com/fmtlib/fmt diff --git a/CMakeLists.txt b/CMakeLists.txt index d7c3147..27bbaf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,4 +18,5 @@ set(CMAKE_CXX_STANDARD_REQUIRED on) add_subdirectory(src) add_subdirectory(tests) -add_subdirectory(deps/Catch2) \ No newline at end of file +add_subdirectory(deps/Catch2) +add_subdirectory(deps/fmt) \ No newline at end of file diff --git a/deps/fmt b/deps/fmt new file mode 160000 index 0000000..0379bf3 --- /dev/null +++ b/deps/fmt @@ -0,0 +1 @@ +Subproject commit 0379bf3a5d52d8542aec1874677c9df5ff9ba5f9 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74a5c81..e5fe42c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,3 +3,5 @@ file(GLOB sources "*.cpp") add_library(myxml ${sources}) target_include_directories(myxml PUBLIC ".") + +target_link_libraries(myxml fmt::fmt) \ No newline at end of file diff --git a/src/error.cpp b/src/error.cpp new file mode 100644 index 0000000..1a3e413 --- /dev/null +++ b/src/error.cpp @@ -0,0 +1,45 @@ +#include "error.hpp" + +namespace myxml +{ + ParseError::ParseError(std::string message) + : message(message) + { + } + + const char *ParseError::what() const noexcept + { + this->fullMessage = this->prefix() + this->message; + return this->fullMessage.c_str(); + } + + const char *SyntaxError::prefix() const + { + return "Syntax Error: "; + } + + SyntaxError::SyntaxError(std::string message) + : ParseError(message) + { + } + + const char *SemanticError::prefix() const + { + return "Sematic Error: "; + } + + SemanticError::SemanticError(std::string message) + : ParseError(message) + { + } + + const char *UnexpectedEndOfInput::prefix() const + { + return "Unexpected End of Input: "; + } + + UnexpectedEndOfInput::UnexpectedEndOfInput() + : ParseError("End of input") + { + } +} \ No newline at end of file diff --git a/src/error.hpp b/src/error.hpp new file mode 100644 index 0000000..36a44aa --- /dev/null +++ b/src/error.hpp @@ -0,0 +1,60 @@ +#pragma once +#include + +namespace myxml +{ + class ParseError : public std::exception + { + private: + virtual const char *prefix() const = 0; + + protected: + std::string message; + // store message after being concated with prefix + mutable std::string fullMessage; + + public: + ParseError(std::string message); + + virtual const char *what() const noexcept override; + }; + + /** + * The input data do not conform to the expected grammar rule. Including: + * 1. Missing or mismatch symbols. For example, missing a '>' in the end of a tag. + * 2. Unexpected token. Encounter a token that is not expected in the context. For example: extra semicolon. + * ... + */ + class SyntaxError : public ParseError + { + private: + virtual const char *prefix() const; + + public: + SyntaxError(std::string); + }; + + /** + * + */ + class SemanticError : public ParseError + { + private: + virtual const char *prefix() const; + + public: + SemanticError(std::string); + }; + + /** + * e.g. EOF + */ + class UnexpectedEndOfInput : public ParseError + { + private: + virtual const char *prefix() const; + + public: + UnexpectedEndOfInput(); + }; +} \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index 19ee987..0c4de46 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,4 +1,7 @@ +#include +#include #include "parser.hpp" +#include "error.hpp" namespace myxml { @@ -52,15 +55,15 @@ namespace myxml return nchars; } - std::optional Parser::parseIdent() + std::string Parser::parseIdent() { if (this->peekChar() == std::nullopt) - return std::nullopt; + throw UnexpectedEndOfInput(); std::size_t begin = this->offset; // validate heading character if (auto head = this->peekChar(); !head || (!std::isalpha(*head) && head != '_')) { - return std::nullopt; + throw SyntaxError(fmt::format("element name which starts with {} is invalid.", *head)); } std::size_t len = 0; while (begin + len < this->buffer.length() && @@ -72,11 +75,15 @@ namespace myxml return this->buffer.substr(begin, len); } - std::optional Parser::parseStringLiteral() + std::string Parser::parseStringLiteral() { + if (!this->peekChar()) + { + throw UnexpectedEndOfInput(); + } if (this->peekChar() != '"') { - return std::nullopt; + throw SyntaxError(fmt::format("expected '\"' at the beginning of string literal, find {}", *this->peekChar())); } std::size_t cur = this->offset; // this->offset points to `"` while (cur + 1 < this->buffer.length() && this->buffer[cur + 1] != '"') @@ -85,7 +92,7 @@ namespace myxml } if (cur + 1 >= this->buffer.length()) { // if jump out due to length limit - return std::nullopt; + throw SyntaxError(fmt::format("missing closing double quote for string literal")); } auto literal = this->buffer.substr(this->offset + 1, cur - this->offset); this->offset = cur + 2; // cur + 1 -> `"` @@ -95,24 +102,35 @@ namespace myxml std::optional> Parser::parseAttribute() { this->skipWhiteSpaces(); - std::pair attri; - if (auto key = this->parseIdent(); key && this->nextChar() == '=') + std::pair attr; + std::string key; + try { - attri.first = *key; - if (auto value = this->parseStringLiteral(); value) - { - attri.second = *value; - return attri; - } + key = this->parseIdent(); + } + catch (SyntaxError e) + { // Only SyntaxError in parseIdent is incorrect heading character + return std::nullopt; + } + catch (UnexpectedEndOfInput e) + { // There must be `>` or else after all attributes + throw e; } - return std::nullopt; + if (this->nextChar() != '=') + { + throw SyntaxError(fmt::format("expected '=' after attribute name")); + } + attr.first = key; + auto value = this->parseStringLiteral(); + attr.second = value; + return attr; } - std::optional> Parser::parseText() + std::shared_ptr Parser::parseText() { if (!this->peekChar()) { - return std::nullopt; + throw UnexpectedEndOfInput(); } std::size_t begin = this->offset; std::size_t len = 0; @@ -123,13 +141,50 @@ namespace myxml } if (this->buffer[begin + len] != '<') { // if jump out of while loop due to length limit - return std::nullopt; + throw SyntaxError(fmt::format("expected '<' after text")); } this->offset += len; return std::shared_ptr(new Text(this->buffer.substr(begin, len))); } - std::optional> Parser::parseElementWithHeader(ElementTag header) + std::optional Parser::ParseTag() + { + if (this->nextChar() != '<') + { + return std::nullopt; + } + ElementTag tag; + if (this->peekChar() == '/') + { + tag.type = ElementTag::ClosingType::Closing; + this->nextChar(); + } + this->skipWhiteSpaces(); + auto name = this->parseIdent(); + tag.name = name; + this->skipWhiteSpaces(); + while (auto attr = this->parseAttribute()) + { + tag.attris.insert(*attr); + } + this->skipWhiteSpaces(); + if (this->peekChar() == '/') + { + if (tag.type != ElementTag::ClosingType::Open) + { + throw SyntaxError(fmt::format("unexpected ending '/' found in closing tag")); + } + tag.type = ElementTag::ClosingType::Closed; + this->nextChar(); + } + if (this->nextChar() != '>') + { + throw SyntaxError(fmt::format("expected '>' at the end of the tag")); + } + return tag; + } + + std::shared_ptr Parser::parseElementWithHeader(ElementTag header) { auto elem = Element::New(); elem->SetName(header.name); @@ -139,19 +194,16 @@ namespace myxml { case '<': { - auto tag = this->ParseTag(); + auto tag = this->ParseTag(); // impossible to be std::nullopt + assert(tag); switch (tag->type) { case ElementTag::ClosingType::Open: - if (auto child = this->parseElementWithHeader(*tag); child) - { - elem->InsertAtEnd(*child); - } - else - { - return std::nullopt; - } + { + auto child = this->parseElementWithHeader(*tag); + elem->InsertAtEnd(child); break; + } case ElementTag::ClosingType::Closed: { auto child = Element::New(); @@ -166,7 +218,7 @@ namespace myxml case ElementTag::ClosingType::Closing: if (tag->name != elem->GetName()) { - return std::nullopt; + throw SyntaxError(fmt::format("")); } if (!header.attris.empty()) { @@ -174,50 +226,17 @@ namespace myxml } return elem; default: - return std::nullopt; + assert(false && "Invalid ElementTag Type"); } break; } default: - if (auto text = this->parseText(); text) - { - elem->InsertAtEnd(*text); - } - else - { - return std::nullopt; - } + auto text = this->parseText(); + elem->InsertAtEnd(text); break; } } - return std::nullopt; - } - - std::optional Parser::parseDeclaration() - { - if (this->peekNextNChars(5) != "nextNChars(5); - std::map attrs; - while (auto attr = this->parseAttribute()) - { - attrs.insert(*attr); - } - this->skipWhiteSpaces(); - if (this->nextNChars(2) != "?>") - { - return std::nullopt; - } - if (auto decl = Declaration::BuildFromAttrs(attrs); decl) - { - return decl; - } - else - { - return std::nullopt; - } + throw UnexpectedEndOfInput(); } std::optional> Parser::ParseElement() @@ -239,53 +258,45 @@ namespace myxml { return this->parseElementWithHeader(*tag); } + else // Closing + { + throw SyntaxError(fmt::format("unexpected closing tag")); + } + } + else + { + return std::nullopt; } - return std::nullopt; } - std::optional Parser::ParseTag() + std::optional Parser::parseDeclaration() { - if (this->nextChar() != '<') + if (this->peekNextNChars(5) != "peekChar() == '/') - { - tag.type = ElementTag::ClosingType::Closing; - this->nextChar(); - } - this->skipWhiteSpaces(); - if (auto name = this->parseIdent(); name) - { - tag.name = *name; - } - else + this->nextNChars(5); + std::map attrs; + while (auto attr = this->parseAttribute()) { - return std::nullopt; + attrs.insert(*attr); } this->skipWhiteSpaces(); - while (auto attri = this->parseAttribute()) + if (this->nextNChars(2) != "?>") { - tag.attris.insert(*attri); + throw SyntaxError(fmt::format("expected \"?>\" at end of xml declaration")); } - if (this->peekChar() == '/') + if (auto decl = Declaration::BuildFromAttrs(attrs); decl) { - if (tag.type != ElementTag::ClosingType::Open) - { - return std::nullopt; - } - tag.type = ElementTag::ClosingType::Closed; - this->nextChar(); + return decl; } - if (this->nextChar() != '>') + else { - return std::nullopt; + throw SemanticError(fmt::format("declaration has incorrect attributes")); } - return tag; } - std::optional Parser::ParseDocument() + Document Parser::ParseDocument() { Document document; if (auto decl = this->parseDeclaration(); decl) @@ -298,7 +309,7 @@ namespace myxml } else { - return std::nullopt; + throw SemanticError(fmt::format("missing root element in xml document")); } return document; } diff --git a/src/parser.hpp b/src/parser.hpp index 16d3bff..75501df 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -33,50 +33,65 @@ namespace myxml std::string buffer; std::size_t offset; - /** - * TODO: - * Define Exceptions , so for all parsing method, - * return std::nullopt means `not this one`, - * and throw exception means `parsing error` - */ - void skipWhiteSpaces(); - // return and not consume current character std::optional peekChar(); - // return and not consume next n characters std::optional peekNextNChars(int); - // return and consume current character std::optional nextChar(); - std::optional nextNChars(int); - // return and consume a ident - // will not consume ident if failed - std::optional parseIdent(); - // return and consume a string `"..."` - // will not consume string if failed - std::optional parseStringLiteral(); - // return and consume an attribute `key="value"` + + /** + * For all parsing method, + * return std::nullopt means `not this one` and will not consume buffer. + * Throw exception means `parsing error` and should stop immediately + */ + + /** + * Parse an identity. + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` if an invalid character occurs. + */ + std::string parseIdent(); + /** + * Parse a string literal + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` if missing any of `"` + */ + std::string parseStringLiteral(); + /** + * @returns std::nullopt if find no attribute + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` if the following chars do not confront to `key="value"` format + */ std::optional> parseAttribute(); - // return and consume pcdata - // will not consume pcdate if failed - std::optional> parseText(); - // return the entire element - // will consume buffer if failed - std::optional> parseElementWithHeader(ElementTag header); - // return the declartion - // will not consume buffer if failed + std::shared_ptr parseText(); + /** + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` + * @throws `SemanticError` + */ + std::shared_ptr parseElementWithHeader(ElementTag header); + /** + * @returns std::nullopt if not starts with ` parseDeclaration(); public: - // return and consume current element - // will consume buffer if failed std::optional> ParseElement(); - // return and consume current tag - // will consume buffer if failed + /** + * @returns std::nullopt if no heading `<` + * @throws `SyntaxError` if the heading character is `<` and the trailing characters are in incorrect format + * @throws `UnexpectedEndOfInput` if missing name + */ std::optional ParseTag(); - // return and consume whole document - // will consume buffer if failed - std::optional ParseDocument(); + /** + * @throws `UnexpectedEndOfInput` + * @throws `SyntaxError` + * @throws `SemanticError` + */ + Document ParseDocument(); Parser() = delete; explicit Parser(std::string_view); };