diff --git a/src/document.cpp b/src/document.cpp new file mode 100644 index 0000000..7fb7fba --- /dev/null +++ b/src/document.cpp @@ -0,0 +1,95 @@ +#include +#include "document.hpp" +#include "parser.hpp" + +namespace myxml +{ + void Document::SetDeclaration(const Declaration &declaration) + { + this->declaration = declaration; + } + + void Document::SetRoot(std::shared_ptr root) + { + this->root = root; + } + + const Declaration &Document::GetDeclartion() const + { + return this->declaration; + } + + Declaration &Document::GetDeclartion() + { + return this->declaration; + } + + const std::shared_ptr &Document::GetRoot() const + { + return this->root; + } + + std::shared_ptr Document::GetRoot() + { + return this->root; + } + + std::optional Document::Parse(std::string input) + { + return Parser(input).ParseDocument(); + } + + std::optional Declaration::BuildFromAttrs(std::map attrs) + { + if (!attrs.count("version") || !util::isValidXmlVersion(attrs["version"])) + { + return std::nullopt; + } + Declaration declaration; + declaration.version = attrs["version"]; + if (attrs.count("encoding")) + { + auto encoding = attrs["encoding"]; + if (!util::isValidXmlEncoding(encoding)) + { + return std::nullopt; + } + declaration.encoding = encoding; + } + if (attrs.count("standalone")) + { + auto standalone = attrs["standalone"]; + if (!util::isValidXmlStandalone(standalone)) + { + return std::nullopt; + } + declaration.standalone = standalone; + } + return declaration; + } + + namespace util + { + bool isValidXmlVersion(std::string_view version) + { + return version == "1.0" || version == "1.1"; + } + + bool isValidXmlEncoding(std::string_view encoding) + { + // FIXME: not cover all valid encoding + static std::set> valid{ + "UTF-8", + "UTF-16", + "UTF-32", + "GBK", + }; + return valid.count(encoding); + } + + bool isValidXmlStandalone(std::string_view standalone) + { + return standalone == "yes" || standalone == "no"; + } + } +} diff --git a/src/document.hpp b/src/document.hpp index 8d453ec..5968fe0 100644 --- a/src/document.hpp +++ b/src/document.hpp @@ -1,5 +1,45 @@ #pragma once +#include +#include "node.hpp" -class XMLDocument +// Declaration and Documant are both NOT Node +namespace myxml { -}; \ No newline at end of file + struct Declaration + { + std::string version; + std::optional encoding; + std::optional standalone; + + // return `std::nullopt` if declartion is in bad format + // TODO: use exception to distinguish each of bad format + static std::optional BuildFromAttrs(std::map attrs); + }; + + class Document + { + private: + Declaration declaration; + std::shared_ptr root; + + public: + /* Manipulate */ + void SetDeclaration(const Declaration &); + void SetRoot(std::shared_ptr root); + + /* Query */ + const Declaration &GetDeclartion() const; + Declaration &GetDeclartion(); + const std::shared_ptr &GetRoot() const; + std::shared_ptr GetRoot(); + + static std::optional Parse(std::string); + }; + + namespace util + { + bool isValidXmlVersion(std::string_view); + bool isValidXmlEncoding(std::string_view); + bool isValidXmlStandalone(std::string_view); + } +} diff --git a/src/element.cpp b/src/element.cpp index 5aa68af..cbd84cd 100644 --- a/src/element.cpp +++ b/src/element.cpp @@ -21,41 +21,6 @@ namespace myxml return Parser(buf).ParseElement().value(); } - std::shared_ptr Element::FirstChild() - { - return this->firstChild; - } - - std::shared_ptr Element::Elem(std::string_view name) - { - if (auto buf = this->nameToElemBuffer.find(name); buf != this->nameToElemBuffer.end()) - { - std::weak_ptr ptr = buf->second; - if (auto child = ptr.lock(); child != nullptr) - { - return child; - } - else - { - this->nameToElemBuffer.erase(buf); - } - } - for (auto child = this->firstChild; child != nullptr; child = child->next) - { - if (auto elem = child->AsElement(); elem && (*elem)->name == name) - { - this->nameToElemBuffer.emplace(name, *elem); - return *elem; - } - } - return nullptr; - } - - std::shared_ptr Element::LastChild() - { - return this->lastChild; - } - std::optional Element::GetAttribute(std::string_view name) { if (auto attr = this->attributes.find(name); attr != this->attributes.end()) @@ -73,75 +38,6 @@ namespace myxml return this->name; } - std::shared_ptr Element::InsertAtFront(const std::shared_ptr &elem) - { - if (elem->parent != nullptr) - { - elem->parent->Unlink(elem); - } - elem->parent = this->shared_from_this(); - if (this->firstChild == nullptr) - { - this->firstChild = elem; - this->lastChild = elem; - } - else - { - this->firstChild->prev = elem; - elem->next = this->firstChild; - this->firstChild = elem; - } - return elem; - } - - std::shared_ptr Element::InsertAtEnd(const std::shared_ptr &elem) - { - if (elem->parent != nullptr) - { - elem->parent->Unlink(elem); - } - elem->parent = this->shared_from_this(); - if (this->firstChild == nullptr) - { - this->firstChild = elem; - this->lastChild = elem; - } - else - { - this->lastChild->next = elem; - elem->prev = this->lastChild; - this->lastChild = elem; - } - return elem; - } - - void Element::Unlink(const std::shared_ptr &elem) - { - if (elem->parent.get() != this) - { - return; - } - if (elem == this->firstChild) - { - this->firstChild = this->firstChild->next; - } - if (elem == this->lastChild) - { - this->lastChild = this->lastChild->prev; - } - if (elem->prev != nullptr) - { - elem->prev->next = elem->next; - } - if (elem->next != nullptr) - { - elem->next->prev = elem->prev; - } - elem->next = nullptr; - elem->prev = nullptr; - elem->parent = nullptr; - } - void Element::SetName(std::string_view name) { this->name = name; @@ -185,13 +81,13 @@ namespace myxml { builder += "" + key + "=\"" + value + "\""; } - if (this->firstChild == nullptr) + if (this->FirstChild() == nullptr) { builder += " />"; return builder; } builder += ">"; - for (auto node = this->firstChild; node != nullptr; node = node->next) + for (auto node = this->FirstChild(); node != nullptr; node = node->next) { builder += node->ExportRaw(); } @@ -207,13 +103,13 @@ namespace myxml { builder += "" + key + "=\"" + value + "\""; } - if (this->firstChild == nullptr) + if (this->FirstChild() == nullptr) { builder += " />\n"; return builder; } builder += ">\n"; - for (auto node = this->firstChild; node != nullptr; node = node->next) + for (auto node = this->FirstChild(); node != nullptr; node = node->next) { builder += node->ExportFormatted(indentLevel + 1, indentSize); } diff --git a/src/element.hpp b/src/element.hpp index a9eeba9..440ff42 100644 --- a/src/element.hpp +++ b/src/element.hpp @@ -9,7 +9,7 @@ namespace myxml { - class Element : public std::enable_shared_from_this, public Node + class Element : public CompositeNode // public std::enable_shared_from_this, public Node { public: enum class ClosingType @@ -19,19 +19,13 @@ namespace myxml }; private: - // list node - // std::shared_ptr parent; - // std::shared_ptr next; - // std::shared_ptr prev; - - // element - std::shared_ptr firstChild; - std::shared_ptr lastChild; + // std::shared_ptr firstChild; + // std::shared_ptr lastChild; std::string name; std::map> attributes; - std::map, std::less<>> nameToElemBuffer; + // std::map, std::less<>> nameToElemBuffer; - // Initializer + /* Set nitializer as private to avoid using Element without share_ptr*/ Element(std::string_view name); Element() = default; @@ -43,16 +37,10 @@ namespace myxml static std::shared_ptr Parse(std::string_view buf); /* Query */ - std::shared_ptr FirstChild(); - std::shared_ptr LastChild(); - std::shared_ptr Elem(std::string_view name); std::optional GetAttribute(std::string_view name); std::string_view GetName() const; /* Manipulate */ - std::shared_ptr InsertAtFront(const std::shared_ptr &); - std::shared_ptr InsertAtEnd(const std::shared_ptr &); - void Unlink(const std::shared_ptr &); void SetName(std::string_view); void SetAttribute(std::string key, std::string value); void ExtendAttributes(std::map); diff --git a/src/node.cpp b/src/node.cpp new file mode 100644 index 0000000..904fe6e --- /dev/null +++ b/src/node.cpp @@ -0,0 +1,121 @@ +#include "node.hpp" +#include "element.hpp" + +namespace myxml +{ + + std::shared_ptr CompositeNode::LastChild() + { + return this->lastChild; + } + + const std::shared_ptr &CompositeNode::LastChild() const + { + return this->lastChild; + } + + std::shared_ptr CompositeNode::FirstChild() + { + return this->firstChild; + } + + const std::shared_ptr &CompositeNode::FirstChild() const + { + return this->firstChild; + } + + std::shared_ptr CompositeNode::Elem(std::string_view name) + { + if (auto buf = this->nameToElemBuffer.find(name); buf != this->nameToElemBuffer.end()) + { + std::weak_ptr ptr = buf->second; + if (auto child = ptr.lock(); child != nullptr) + { + return child; + } + else + { + this->nameToElemBuffer.erase(buf); + } + } + for (auto child = this->firstChild; child != nullptr; child = child->next) + { + if (auto elem = child->AsElement(); elem && (*elem)->GetName() == name) + { + this->nameToElemBuffer.emplace(name, *elem); + return *elem; + } + } + return nullptr; + } + + std::shared_ptr CompositeNode::InsertAtFront(const std::shared_ptr &elem) + { + if (elem->parent != nullptr) + { + elem->parent->Unlink(elem); + } + elem->parent = this->shared_from_this(); + if (this->firstChild == nullptr) + { + this->firstChild = elem; + this->lastChild = elem; + } + else + { + this->firstChild->prev = elem; + elem->next = this->firstChild; + this->firstChild = elem; + } + return elem; + } + + std::shared_ptr CompositeNode::InsertAtEnd(const std::shared_ptr &elem) + { + if (elem->parent != nullptr) + { + elem->parent->Unlink(elem); + } + elem->parent = this->shared_from_this(); + if (this->firstChild == nullptr) + { + this->firstChild = elem; + this->lastChild = elem; + } + else + { + this->lastChild->next = elem; + elem->prev = this->lastChild; + this->lastChild = elem; + } + return elem; + } + + void CompositeNode::Unlink(const std::shared_ptr &elem) + { + if (elem->parent.get() != this) + { + return; + } + if (elem == this->firstChild) + { + this->firstChild = this->firstChild->next; + } + if (elem == this->lastChild) + { + this->lastChild = this->lastChild->prev; + } + if (elem->prev != nullptr) + { + elem->prev->next = elem->next; + } + if (elem->next != nullptr) + { + elem->next->prev = elem->prev; + } + elem->next = nullptr; + elem->prev = nullptr; + elem->parent = nullptr; + } + +} diff --git a/src/node.hpp b/src/node.hpp index 9dcb92b..91919c9 100644 --- a/src/node.hpp +++ b/src/node.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include "exportable.hpp" @@ -10,19 +11,23 @@ namespace myxml class Element; // defined in text.hpp class Text; + // defined below + class CompositeNode; enum class NodeType { Text, Element, + Declaration, }; + // Element, Text are Node. class Node : public Exportable { public: virtual ~Node() = default; - std::shared_ptr parent; + std::shared_ptr parent; std::shared_ptr prev; std::shared_ptr next; @@ -31,4 +36,28 @@ namespace myxml virtual std::optional> AsElement() = 0; virtual std::optional> AsText() = 0; }; + + // Element are Composite Node. + class CompositeNode : public Node, public std::enable_shared_from_this + { + private: + std::shared_ptr firstChild; + std::shared_ptr lastChild; + std::map, std::less<>> nameToElemBuffer; + + public: + virtual ~CompositeNode() = default; + + /* Query */ + std::shared_ptr FirstChild(); + const std::shared_ptr &FirstChild() const; + std::shared_ptr LastChild(); + const std::shared_ptr &LastChild() const; + std::shared_ptr Elem(std::string_view name); + + /* Manipulate */ + std::shared_ptr InsertAtFront(const std::shared_ptr &); + std::shared_ptr InsertAtEnd(const std::shared_ptr &); + void Unlink(const std::shared_ptr &); + }; } diff --git a/src/parser.cpp b/src/parser.cpp index 5272daf..19ee987 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -22,6 +22,18 @@ namespace myxml } } + std::optional Parser::peekNextNChars(int n) + { + if (this->offset + n - 1 < this->buffer.length()) + { + return this->buffer.substr(this->offset, n); + } + else + { + return std::nullopt; + } + } + std::optional Parser::nextChar() { if (auto peek = this->peekChar(); peek.has_value()) @@ -33,6 +45,13 @@ namespace myxml return std::nullopt; } + std::optional Parser::nextNChars(int n) + { + auto nchars = this->buffer.substr(this->offset, n); + this->offset += n; + return nchars; + } + std::optional Parser::parseIdent() { if (this->peekChar() == std::nullopt) @@ -110,7 +129,7 @@ namespace myxml return std::shared_ptr(new Text(this->buffer.substr(begin, len))); } - std::optional> Parser::parseElementWithHeader(Tag header) + std::optional> Parser::parseElementWithHeader(ElementTag header) { auto elem = Element::New(); elem->SetName(header.name); @@ -123,7 +142,7 @@ namespace myxml auto tag = this->ParseTag(); switch (tag->type) { - case Tag::ClosingType::Open: + case ElementTag::ClosingType::Open: if (auto child = this->parseElementWithHeader(*tag); child) { elem->InsertAtEnd(*child); @@ -133,7 +152,7 @@ namespace myxml return std::nullopt; } break; - case Tag::ClosingType::Closed: + case ElementTag::ClosingType::Closed: { auto child = Element::New(); child->SetName(tag->name); @@ -144,7 +163,7 @@ namespace myxml elem->InsertAtEnd(child); break; } - case Tag::ClosingType::Closing: + case ElementTag::ClosingType::Closing: if (tag->name != elem->GetName()) { return std::nullopt; @@ -174,12 +193,39 @@ namespace myxml return std::nullopt; } + std::optional Parser::parseDeclaration() + { + if (this->peekNextNChars(5) != "nextNChars(5); + std::map attrs; + while (auto attr = this->parseAttribute()) + { + attrs.insert(*attr); + } + this->skipWhiteSpaces(); + if (this->nextNChars(2) != "?>") + { + return std::nullopt; + } + if (auto decl = Declaration::BuildFromAttrs(attrs); decl) + { + return decl; + } + else + { + return std::nullopt; + } + } + std::optional> Parser::ParseElement() { this->skipWhiteSpaces(); if (auto tag = this->ParseTag(); tag) { - if (tag->type == Tag::ClosingType::Closed) + if (tag->type == ElementTag::ClosingType::Closed) { auto elem = Element::New(); elem->SetName(tag->name); @@ -189,7 +235,7 @@ namespace myxml } return elem; } - else if (tag->type == Tag::ClosingType::Open) + else if (tag->type == ElementTag::ClosingType::Open) { return this->parseElementWithHeader(*tag); } @@ -197,16 +243,16 @@ namespace myxml return std::nullopt; } - std::optional Parser::ParseTag() + std::optional Parser::ParseTag() { if (this->nextChar() != '<') { return std::nullopt; } - Tag tag; + ElementTag tag; if (this->peekChar() == '/') { - tag.type = Tag::ClosingType::Closing; + tag.type = ElementTag::ClosingType::Closing; this->nextChar(); } this->skipWhiteSpaces(); @@ -225,11 +271,11 @@ namespace myxml } if (this->peekChar() == '/') { - if (tag.type != Tag::ClosingType::Open) + if (tag.type != ElementTag::ClosingType::Open) { return std::nullopt; } - tag.type = Tag::ClosingType::Closed; + tag.type = ElementTag::ClosingType::Closed; this->nextChar(); } if (this->nextChar() != '>') @@ -239,6 +285,24 @@ namespace myxml return tag; } + std::optional Parser::ParseDocument() + { + Document document; + if (auto decl = this->parseDeclaration(); decl) + { + document.SetDeclaration(*decl); + } + if (auto root = this->ParseElement(); root) + { + document.SetRoot(*root); + } + else + { + return std::nullopt; + } + return document; + } + Parser::Parser(std::string_view buffer) : buffer(buffer), offset(0) {} diff --git a/src/parser.hpp b/src/parser.hpp index ba6c344..16d3bff 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -1,9 +1,15 @@ #pragma once #include "element.hpp" +#include "document.hpp" namespace myxml { + // No effect currently. Just use it to mark what is a tag. struct Tag + { + }; + + struct ElementTag { enum class ClosingType { @@ -13,21 +19,36 @@ namespace myxml }; std::string name; - Tag::ClosingType type = ClosingType::Open; + ElementTag::ClosingType type = ClosingType::Open; std::map attris; }; + struct ProcessingInstruction + { + }; + class Parser { private: std::string buffer; std::size_t offset; + /** + * TODO: + * Define Exceptions , so for all parsing method, + * return std::nullopt means `not this one`, + * and throw exception means `parsing error` + */ + void skipWhiteSpaces(); // return and not consume current character std::optional peekChar(); + // return and not consume next n characters + std::optional peekNextNChars(int); // return and consume current character std::optional nextChar(); + + std::optional nextNChars(int); // return and consume a ident // will not consume ident if failed std::optional parseIdent(); @@ -41,7 +62,10 @@ namespace myxml std::optional> parseText(); // return the entire element // will consume buffer if failed - std::optional> parseElementWithHeader(Tag header); + std::optional> parseElementWithHeader(ElementTag header); + // return the declartion + // will not consume buffer if failed + std::optional parseDeclaration(); public: // return and consume current element @@ -49,7 +73,10 @@ namespace myxml std::optional> ParseElement(); // return and consume current tag // will consume buffer if failed - std::optional ParseTag(); + std::optional ParseTag(); + // return and consume whole document + // will consume buffer if failed + std::optional ParseDocument(); Parser() = delete; explicit Parser(std::string_view); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 76beb38..f8e2d17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,15 +3,19 @@ include(CTest) add_executable(element_test element_test.cpp) add_executable(parser_test parser_test.cpp) add_executable(exportable_test exportable_test.cpp) +add_executable(document_test document_test.cpp) target_link_libraries(element_test Catch2::Catch2WithMain myxml) target_link_libraries(parser_test Catch2::Catch2WithMain myxml) target_link_libraries(exportable_test Catch2::Catch2WithMain myxml) +target_link_libraries(document_test Catch2::Catch2WithMain myxml) target_compile_features(element_test PRIVATE cxx_std_17) target_compile_features(parser_test PRIVATE cxx_std_17) target_compile_features(exportable_test PRIVATE cxx_std_17) +target_compile_features(document_test PRIVATE cxx_std_17) add_test(NAME element_test COMMAND element_test) add_test(NAME parser_test COMMAND parser_test) -add_test(NAME exportable_test COMMAND exportable_test) \ No newline at end of file +add_test(NAME exportable_test COMMAND exportable_test) +add_test(NAME document_test COMMAND document_test) \ No newline at end of file diff --git a/tests/document_test.cpp b/tests/document_test.cpp new file mode 100644 index 0000000..6b124bf --- /dev/null +++ b/tests/document_test.cpp @@ -0,0 +1,28 @@ +#include +#include "document.cpp" + +TEST_CASE("Simple document", "[document]") +{ + SECTION("No decl") + { + std::string input = R"( + Value +)"; + auto doc = myxml::Document::Parse(input); + REQUIRE(doc->GetRoot()->GetName() == "root"); + REQUIRE(doc->GetRoot()->Elem("child")->GetName() == "child"); + REQUIRE(doc->GetRoot()->Elem("child")->FirstChild()->AsText().value()->ExportRaw() == "Value"); + } + + SECTION("With decl") + { + std::string input = R"( + + Value + +)"; + auto doc = myxml::Document::Parse(input); + REQUIRE(doc->GetDeclartion().version == "1.0"); + REQUIRE(doc->GetDeclartion().encoding == "UTF-8"); + } +} \ No newline at end of file diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp index 0a46c22..beac330 100644 --- a/tests/parser_test.cpp +++ b/tests/parser_test.cpp @@ -7,17 +7,17 @@ TEST_CASE("Parsing tag", "parser") std::string open = ""; auto tag = myxml::Parser(open).ParseTag().value(); REQUIRE(tag.name == "tag"); - REQUIRE(tag.type == myxml::Tag::ClosingType::Open); + REQUIRE(tag.type == myxml::ElementTag::ClosingType::Open); std::string closed = ""; tag = myxml::Parser(closed).ParseTag().value(); REQUIRE(tag.name == "tag"); - REQUIRE(tag.type == myxml::Tag::ClosingType::Closed); + REQUIRE(tag.type == myxml::ElementTag::ClosingType::Closed); std::string closing = ""; tag = myxml::Parser(closing).ParseTag().value(); REQUIRE(tag.name == "tag"); - REQUIRE(tag.type == myxml::Tag::ClosingType::Closing); + REQUIRE(tag.type == myxml::ElementTag::ClosingType::Closing); } TEST_CASE("Parsing simple xml elements", "[parser]")