From e4d0cf7cf4e5533f25f60e904e83d56827c8096c Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Thu, 22 Aug 2024 23:59:17 +0800 Subject: [PATCH 1/5] feat(document.hpp): defines xml-declaration --- src/document.cpp | 34 ++++++++++++++++++++++++++++++++++ src/document.hpp | 32 ++++++++++++++++++++++++++++++-- src/node.hpp | 1 + 3 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 src/document.cpp diff --git a/src/document.cpp b/src/document.cpp new file mode 100644 index 0000000..de77470 --- /dev/null +++ b/src/document.cpp @@ -0,0 +1,34 @@ +#include "document.hpp" + +namespace myxml +{ + NodeType Declaration::Type() + { + return NodeType::Declaration; + } + + bool Declaration::isType(NodeType type) + { + return type == NodeType::Declaration; + } + + std::optional> Declaration::AsElement() + { + return std::nullopt; + } + + std::optional> Declaration::AsText() + { + return std::nullopt; + } + + std::string Declaration::ExportRaw() const + { + return "version + "\" encoding=\"" + this->encoding + "\"?>"; + } + + std::string Declaration::ExportFormatted(int indentLevel, int indentSize) const + { + return std::string(indentLevel * indentSize, ' ') + this->ExportRaw(); + } +} diff --git a/src/document.hpp b/src/document.hpp index 8d453ec..1aafaf9 100644 --- a/src/document.hpp +++ b/src/document.hpp @@ -1,5 +1,33 @@ #pragma once +#include "node.hpp" -class XMLDocument +namespace myxml { -}; \ No newline at end of file + class Declaration : public Node + { + private: + std::string version; + std::string encoding; + + public: + /* Implement Node*/ + virtual NodeType Type(); + virtual bool isType(NodeType); + virtual std::optional> AsElement(); + virtual std::optional> AsText(); + + /* Implement Exportable */ + virtual std::string ExportRaw() const; + virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const; + }; + + class Document : public Node + { + private: + Declaration declaration; + std::shared_ptr root; + + public: + + }; +} diff --git a/src/node.hpp b/src/node.hpp index 9dcb92b..2581fae 100644 --- a/src/node.hpp +++ b/src/node.hpp @@ -15,6 +15,7 @@ namespace myxml { Text, Element, + Declaration, }; class Node : public Exportable From db41792a3ae16a31275c25fb7c0d263e562abbaa Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Fri, 23 Aug 2024 00:31:55 +0800 Subject: [PATCH 2/5] feat(node.hpp): add class Composite Node --- src/element.cpp | 112 ++------------------------------------------ src/element.hpp | 22 ++------- src/node.cpp | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ src/node.hpp | 30 +++++++++++- 4 files changed, 159 insertions(+), 126 deletions(-) create mode 100644 src/node.cpp diff --git a/src/element.cpp b/src/element.cpp index 5aa68af..cbd84cd 100644 --- a/src/element.cpp +++ b/src/element.cpp @@ -21,41 +21,6 @@ namespace myxml return Parser(buf).ParseElement().value(); } - std::shared_ptr Element::FirstChild() - { - return this->firstChild; - } - - std::shared_ptr Element::Elem(std::string_view name) - { - if (auto buf = this->nameToElemBuffer.find(name); buf != this->nameToElemBuffer.end()) - { - std::weak_ptr ptr = buf->second; - if (auto child = ptr.lock(); child != nullptr) - { - return child; - } - else - { - this->nameToElemBuffer.erase(buf); - } - } - for (auto child = this->firstChild; child != nullptr; child = child->next) - { - if (auto elem = child->AsElement(); elem && (*elem)->name == name) - { - this->nameToElemBuffer.emplace(name, *elem); - return *elem; - } - } - return nullptr; - } - - std::shared_ptr Element::LastChild() - { - return this->lastChild; - } - std::optional Element::GetAttribute(std::string_view name) { if (auto attr = this->attributes.find(name); attr != this->attributes.end()) @@ -73,75 +38,6 @@ namespace myxml return this->name; } - std::shared_ptr Element::InsertAtFront(const std::shared_ptr &elem) - { - if (elem->parent != nullptr) - { - elem->parent->Unlink(elem); - } - elem->parent = this->shared_from_this(); - if (this->firstChild == nullptr) - { - this->firstChild = elem; - this->lastChild = elem; - } - else - { - this->firstChild->prev = elem; - elem->next = this->firstChild; - this->firstChild = elem; - } - return elem; - } - - std::shared_ptr Element::InsertAtEnd(const std::shared_ptr &elem) - { - if (elem->parent != nullptr) - { - elem->parent->Unlink(elem); - } - elem->parent = this->shared_from_this(); - if (this->firstChild == nullptr) - { - this->firstChild = elem; - this->lastChild = elem; - } - else - { - this->lastChild->next = elem; - elem->prev = this->lastChild; - this->lastChild = elem; - } - return elem; - } - - void Element::Unlink(const std::shared_ptr &elem) - { - if (elem->parent.get() != this) - { - return; - } - if (elem == this->firstChild) - { - this->firstChild = this->firstChild->next; - } - if (elem == this->lastChild) - { - this->lastChild = this->lastChild->prev; - } - if (elem->prev != nullptr) - { - elem->prev->next = elem->next; - } - if (elem->next != nullptr) - { - elem->next->prev = elem->prev; - } - elem->next = nullptr; - elem->prev = nullptr; - elem->parent = nullptr; - } - void Element::SetName(std::string_view name) { this->name = name; @@ -185,13 +81,13 @@ namespace myxml { builder += "" + key + "=\"" + value + "\""; } - if (this->firstChild == nullptr) + if (this->FirstChild() == nullptr) { builder += " />"; return builder; } builder += ">"; - for (auto node = this->firstChild; node != nullptr; node = node->next) + for (auto node = this->FirstChild(); node != nullptr; node = node->next) { builder += node->ExportRaw(); } @@ -207,13 +103,13 @@ namespace myxml { builder += "" + key + "=\"" + value + "\""; } - if (this->firstChild == nullptr) + if (this->FirstChild() == nullptr) { builder += " />\n"; return builder; } builder += ">\n"; - for (auto node = this->firstChild; node != nullptr; node = node->next) + for (auto node = this->FirstChild(); node != nullptr; node = node->next) { builder += node->ExportFormatted(indentLevel + 1, indentSize); } diff --git a/src/element.hpp b/src/element.hpp index a9eeba9..440ff42 100644 --- a/src/element.hpp +++ b/src/element.hpp @@ -9,7 +9,7 @@ namespace myxml { - class Element : public std::enable_shared_from_this, public Node + class Element : public CompositeNode // public std::enable_shared_from_this, public Node { public: enum class ClosingType @@ -19,19 +19,13 @@ namespace myxml }; private: - // list node - // std::shared_ptr parent; - // std::shared_ptr next; - // std::shared_ptr prev; - - // element - std::shared_ptr firstChild; - std::shared_ptr lastChild; + // std::shared_ptr firstChild; + // std::shared_ptr lastChild; std::string name; std::map> attributes; - std::map, std::less<>> nameToElemBuffer; + // std::map, std::less<>> nameToElemBuffer; - // Initializer + /* Set nitializer as private to avoid using Element without share_ptr*/ Element(std::string_view name); Element() = default; @@ -43,16 +37,10 @@ namespace myxml static std::shared_ptr Parse(std::string_view buf); /* Query */ - std::shared_ptr FirstChild(); - std::shared_ptr LastChild(); - std::shared_ptr Elem(std::string_view name); std::optional GetAttribute(std::string_view name); std::string_view GetName() const; /* Manipulate */ - std::shared_ptr InsertAtFront(const std::shared_ptr &); - std::shared_ptr InsertAtEnd(const std::shared_ptr &); - void Unlink(const std::shared_ptr &); void SetName(std::string_view); void SetAttribute(std::string key, std::string value); void ExtendAttributes(std::map); diff --git a/src/node.cpp b/src/node.cpp new file mode 100644 index 0000000..904fe6e --- /dev/null +++ b/src/node.cpp @@ -0,0 +1,121 @@ +#include "node.hpp" +#include "element.hpp" + +namespace myxml +{ + + std::shared_ptr CompositeNode::LastChild() + { + return this->lastChild; + } + + const std::shared_ptr &CompositeNode::LastChild() const + { + return this->lastChild; + } + + std::shared_ptr CompositeNode::FirstChild() + { + return this->firstChild; + } + + const std::shared_ptr &CompositeNode::FirstChild() const + { + return this->firstChild; + } + + std::shared_ptr CompositeNode::Elem(std::string_view name) + { + if (auto buf = this->nameToElemBuffer.find(name); buf != this->nameToElemBuffer.end()) + { + std::weak_ptr ptr = buf->second; + if (auto child = ptr.lock(); child != nullptr) + { + return child; + } + else + { + this->nameToElemBuffer.erase(buf); + } + } + for (auto child = this->firstChild; child != nullptr; child = child->next) + { + if (auto elem = child->AsElement(); elem && (*elem)->GetName() == name) + { + this->nameToElemBuffer.emplace(name, *elem); + return *elem; + } + } + return nullptr; + } + + std::shared_ptr CompositeNode::InsertAtFront(const std::shared_ptr &elem) + { + if (elem->parent != nullptr) + { + elem->parent->Unlink(elem); + } + elem->parent = this->shared_from_this(); + if (this->firstChild == nullptr) + { + this->firstChild = elem; + this->lastChild = elem; + } + else + { + this->firstChild->prev = elem; + elem->next = this->firstChild; + this->firstChild = elem; + } + return elem; + } + + std::shared_ptr CompositeNode::InsertAtEnd(const std::shared_ptr &elem) + { + if (elem->parent != nullptr) + { + elem->parent->Unlink(elem); + } + elem->parent = this->shared_from_this(); + if (this->firstChild == nullptr) + { + this->firstChild = elem; + this->lastChild = elem; + } + else + { + this->lastChild->next = elem; + elem->prev = this->lastChild; + this->lastChild = elem; + } + return elem; + } + + void CompositeNode::Unlink(const std::shared_ptr &elem) + { + if (elem->parent.get() != this) + { + return; + } + if (elem == this->firstChild) + { + this->firstChild = this->firstChild->next; + } + if (elem == this->lastChild) + { + this->lastChild = this->lastChild->prev; + } + if (elem->prev != nullptr) + { + elem->prev->next = elem->next; + } + if (elem->next != nullptr) + { + elem->next->prev = elem->prev; + } + elem->next = nullptr; + elem->prev = nullptr; + elem->parent = nullptr; + } + +} diff --git a/src/node.hpp b/src/node.hpp index 2581fae..a5c5fbd 100644 --- a/src/node.hpp +++ b/src/node.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include "exportable.hpp" @@ -10,6 +11,8 @@ namespace myxml class Element; // defined in text.hpp class Text; + // defined below + class CompositeNode; enum class NodeType { @@ -18,12 +21,13 @@ namespace myxml Declaration, }; + // Document, Element, Text, Declaration are Node. class Node : public Exportable { public: virtual ~Node() = default; - std::shared_ptr parent; + std::shared_ptr parent; std::shared_ptr prev; std::shared_ptr next; @@ -32,4 +36,28 @@ namespace myxml virtual std::optional> AsElement() = 0; virtual std::optional> AsText() = 0; }; + + // Doucment and Element are Composite Node. + class CompositeNode : public Node, public std::enable_shared_from_this + { + private: + std::shared_ptr firstChild; + std::shared_ptr lastChild; + std::map, std::less<>> nameToElemBuffer; + + public: + virtual ~CompositeNode() = default; + + /* Query */ + std::shared_ptr FirstChild(); + const std::shared_ptr &FirstChild() const; + std::shared_ptr LastChild(); + const std::shared_ptr &LastChild() const; + std::shared_ptr Elem(std::string_view name); + + /* Manipulate */ + std::shared_ptr InsertAtFront(const std::shared_ptr &); + std::shared_ptr InsertAtEnd(const std::shared_ptr &); + void Unlink(const std::shared_ptr &); + }; } From 5a56b328585207837a276e3c4d76eed54d2ff92a Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Sun, 25 Aug 2024 22:36:32 +0800 Subject: [PATCH 3/5] feat(document.hpp): Declaration::BuildFromAttrs --- src/document.cpp | 87 +++++++++++++++++++++++++++++++++++-------- src/document.hpp | 34 +++++++++-------- src/node.hpp | 4 +- src/parser.cpp | 27 ++++++++------ src/parser.hpp | 18 +++++++-- tests/parser_test.cpp | 6 +-- 6 files changed, 126 insertions(+), 50 deletions(-) diff --git a/src/document.cpp b/src/document.cpp index de77470..e55a083 100644 --- a/src/document.cpp +++ b/src/document.cpp @@ -1,34 +1,89 @@ +#include #include "document.hpp" namespace myxml { - NodeType Declaration::Type() + void Document::SetDeclaration(const Declaration &declaration) { - return NodeType::Declaration; + this->declaration = declaration; } - bool Declaration::isType(NodeType type) + const Declaration &Document::GetDeclartion() const { - return type == NodeType::Declaration; + return this->declaration; } - std::optional> Declaration::AsElement() + std::optional Declaration::BuildFromAttrs(std::map attrs) { - return std::nullopt; + if (!attrs.count("version") || !util::isValidXmlVersion(attrs["version"])) + { + return std::nullopt; + } + Declaration declaration; + declaration.version = attrs["version"]; + if (attrs.count("encoding")) + { + auto encoding = attrs["encoding"]; + if (!util::isValidXmlEncoding(encoding)) + { + return std::nullopt; + } + declaration.encoding = encoding; + } + if (attrs.count("standalone")) + { + auto standalone = attrs["standalone"]; + if (!util::isValidXmlStandalone(standalone)) + { + return std::nullopt; + } + declaration.standalone = standalone; + } + return declaration; } - std::optional> Declaration::AsText() + namespace util { - return std::nullopt; - } + bool isValidXmlVersion(std::string_view version) + { + return version == "1.0" || version == "1.1"; + } - std::string Declaration::ExportRaw() const - { - return "version + "\" encoding=\"" + this->encoding + "\"?>"; - } + bool isValidXmlEncoding(std::string_view encoding) + { + // Registered at iana.org + static std::set> valid{ + "US-ASCII", + "ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-9", + "ISO-8859-10", + "Shift_JIS", + "EUC-JP", + "ISO-2022-KR", + "EUC-KR", + "ISO-2022-JP", + "ISO-2022-JP-2", + "ISO-8859-6-E", + "ISO-8859-6-I", + "ISO-8859-8-E", + "ISO-8859-8-I", + "GB2312", + "Big5", + "KOI8-R", + }; + return valid.count(encoding); + } - std::string Declaration::ExportFormatted(int indentLevel, int indentSize) const - { - return std::string(indentLevel * indentSize, ' ') + this->ExportRaw(); + bool isValidXmlStandalone(std::string_view standalone) + { + return standalone == "yes" || standalone == "no"; + } } } diff --git a/src/document.hpp b/src/document.hpp index 1aafaf9..d8eb40e 100644 --- a/src/document.hpp +++ b/src/document.hpp @@ -1,33 +1,37 @@ #pragma once +#include #include "node.hpp" +// Declaration and Documant are both NOT Node namespace myxml { - class Declaration : public Node + struct Declaration { - private: std::string version; - std::string encoding; - - public: - /* Implement Node*/ - virtual NodeType Type(); - virtual bool isType(NodeType); - virtual std::optional> AsElement(); - virtual std::optional> AsText(); + std::optional encoding; + std::optional standalone; - /* Implement Exportable */ - virtual std::string ExportRaw() const; - virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const; + // return `std::nullopt` if declartion is in bad format + // TODO: use exception to distinguish each of bad format + static std::optional BuildFromAttrs(std::map attrs); }; - class Document : public Node + class Document { private: Declaration declaration; std::shared_ptr root; public: - + /* Manipulate */ + void SetDeclaration(const Declaration &); + const Declaration &GetDeclartion() const; }; + + namespace util + { + bool isValidXmlVersion(std::string_view); + bool isValidXmlEncoding(std::string_view); + bool isValidXmlStandalone(std::string_view); + } } diff --git a/src/node.hpp b/src/node.hpp index a5c5fbd..91919c9 100644 --- a/src/node.hpp +++ b/src/node.hpp @@ -21,7 +21,7 @@ namespace myxml Declaration, }; - // Document, Element, Text, Declaration are Node. + // Element, Text are Node. class Node : public Exportable { public: @@ -37,7 +37,7 @@ namespace myxml virtual std::optional> AsText() = 0; }; - // Doucment and Element are Composite Node. + // Element are Composite Node. class CompositeNode : public Node, public std::enable_shared_from_this { private: diff --git a/src/parser.cpp b/src/parser.cpp index 5272daf..34067be 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -110,7 +110,7 @@ namespace myxml return std::shared_ptr(new Text(this->buffer.substr(begin, len))); } - std::optional> Parser::parseElementWithHeader(Tag header) + std::optional> Parser::parseElementWithHeader(ElementTag header) { auto elem = Element::New(); elem->SetName(header.name); @@ -123,7 +123,7 @@ namespace myxml auto tag = this->ParseTag(); switch (tag->type) { - case Tag::ClosingType::Open: + case ElementTag::ClosingType::Open: if (auto child = this->parseElementWithHeader(*tag); child) { elem->InsertAtEnd(*child); @@ -133,7 +133,7 @@ namespace myxml return std::nullopt; } break; - case Tag::ClosingType::Closed: + case ElementTag::ClosingType::Closed: { auto child = Element::New(); child->SetName(tag->name); @@ -144,7 +144,7 @@ namespace myxml elem->InsertAtEnd(child); break; } - case Tag::ClosingType::Closing: + case ElementTag::ClosingType::Closing: if (tag->name != elem->GetName()) { return std::nullopt; @@ -179,7 +179,7 @@ namespace myxml this->skipWhiteSpaces(); if (auto tag = this->ParseTag(); tag) { - if (tag->type == Tag::ClosingType::Closed) + if (tag->type == ElementTag::ClosingType::Closed) { auto elem = Element::New(); elem->SetName(tag->name); @@ -189,7 +189,7 @@ namespace myxml } return elem; } - else if (tag->type == Tag::ClosingType::Open) + else if (tag->type == ElementTag::ClosingType::Open) { return this->parseElementWithHeader(*tag); } @@ -197,16 +197,16 @@ namespace myxml return std::nullopt; } - std::optional Parser::ParseTag() + std::optional Parser::ParseTag() { if (this->nextChar() != '<') { return std::nullopt; } - Tag tag; + ElementTag tag; if (this->peekChar() == '/') { - tag.type = Tag::ClosingType::Closing; + tag.type = ElementTag::ClosingType::Closing; this->nextChar(); } this->skipWhiteSpaces(); @@ -225,11 +225,11 @@ namespace myxml } if (this->peekChar() == '/') { - if (tag.type != Tag::ClosingType::Open) + if (tag.type != ElementTag::ClosingType::Open) { return std::nullopt; } - tag.type = Tag::ClosingType::Closed; + tag.type = ElementTag::ClosingType::Closed; this->nextChar(); } if (this->nextChar() != '>') @@ -239,6 +239,11 @@ namespace myxml return tag; } + std::optional Parser::ParseDocument() + { + return std::optional(); + } + Parser::Parser(std::string_view buffer) : buffer(buffer), offset(0) {} diff --git a/src/parser.hpp b/src/parser.hpp index ba6c344..ab4334b 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -1,9 +1,15 @@ #pragma once #include "element.hpp" +#include "document.hpp" namespace myxml { + // No effect currently. Just use it to mark what is a tag. struct Tag + { + }; + + struct ElementTag { enum class ClosingType { @@ -13,10 +19,14 @@ namespace myxml }; std::string name; - Tag::ClosingType type = ClosingType::Open; + ElementTag::ClosingType type = ClosingType::Open; std::map attris; }; + struct ProcessingInstruction + { + }; + class Parser { private: @@ -41,7 +51,7 @@ namespace myxml std::optional> parseText(); // return the entire element // will consume buffer if failed - std::optional> parseElementWithHeader(Tag header); + std::optional> parseElementWithHeader(ElementTag header); public: // return and consume current element @@ -49,7 +59,9 @@ namespace myxml std::optional> ParseElement(); // return and consume current tag // will consume buffer if failed - std::optional ParseTag(); + std::optional ParseTag(); + + std::optional ParseDocument(); Parser() = delete; explicit Parser(std::string_view); }; diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp index 0a46c22..beac330 100644 --- a/tests/parser_test.cpp +++ b/tests/parser_test.cpp @@ -7,17 +7,17 @@ TEST_CASE("Parsing tag", "parser") std::string open = ""; auto tag = myxml::Parser(open).ParseTag().value(); REQUIRE(tag.name == "tag"); - REQUIRE(tag.type == myxml::Tag::ClosingType::Open); + REQUIRE(tag.type == myxml::ElementTag::ClosingType::Open); std::string closed = ""; tag = myxml::Parser(closed).ParseTag().value(); REQUIRE(tag.name == "tag"); - REQUIRE(tag.type == myxml::Tag::ClosingType::Closed); + REQUIRE(tag.type == myxml::ElementTag::ClosingType::Closed); std::string closing = ""; tag = myxml::Parser(closing).ParseTag().value(); REQUIRE(tag.name == "tag"); - REQUIRE(tag.type == myxml::Tag::ClosingType::Closing); + REQUIRE(tag.type == myxml::ElementTag::ClosingType::Closing); } TEST_CASE("Parsing simple xml elements", "[parser]") From 67674082bafd551e85c71a3dd40cfecd55c32691 Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Sun, 25 Aug 2024 23:23:10 +0800 Subject: [PATCH 4/5] feat(parser.hpp): parse document --- src/document.cpp | 20 ++++++++++++++++ src/document.hpp | 6 +++++ src/parser.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++++++- src/parser.hpp | 17 +++++++++++++- 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/src/document.cpp b/src/document.cpp index e55a083..e2a192d 100644 --- a/src/document.cpp +++ b/src/document.cpp @@ -8,11 +8,31 @@ namespace myxml this->declaration = declaration; } + void Document::SetRoot(std::shared_ptr root) + { + this->root = root; + } + const Declaration &Document::GetDeclartion() const { return this->declaration; } + Declaration &Document::GetDeclartion() + { + return this->declaration; + } + + const std::shared_ptr &Document::GetRoot() const + { + return this->root; + } + + std::shared_ptr Document::GetRoot() + { + return this->root; + } + std::optional Declaration::BuildFromAttrs(std::map attrs) { if (!attrs.count("version") || !util::isValidXmlVersion(attrs["version"])) diff --git a/src/document.hpp b/src/document.hpp index d8eb40e..7e1103d 100644 --- a/src/document.hpp +++ b/src/document.hpp @@ -25,7 +25,13 @@ namespace myxml public: /* Manipulate */ void SetDeclaration(const Declaration &); + void SetRoot(std::shared_ptr root); + + /* Query */ const Declaration &GetDeclartion() const; + Declaration &GetDeclartion(); + const std::shared_ptr &GetRoot() const; + std::shared_ptr GetRoot(); }; namespace util diff --git a/src/parser.cpp b/src/parser.cpp index 34067be..6d2499c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -22,6 +22,18 @@ namespace myxml } } + std::optional Parser::peekNextNChars(int n) + { + if (this->offset + n - 1 < this->buffer.length()) + { + return this->buffer.substr(this->offset, n); + } + else + { + return std::nullopt; + } + } + std::optional Parser::nextChar() { if (auto peek = this->peekChar(); peek.has_value()) @@ -33,6 +45,13 @@ namespace myxml return std::nullopt; } + std::optional Parser::nextNChars(int n) + { + std::string_view nchars = this->buffer.substr(this->offset, this->offset + n); + this->offset += n; + return nchars; + } + std::optional Parser::parseIdent() { if (this->peekChar() == std::nullopt) @@ -174,6 +193,32 @@ namespace myxml return std::nullopt; } + std::optional Parser::parseDeclaration() + { + if (this->peekNextNChars(2) != "nextNChars(2); + std::map attrs; + while (auto attr = this->parseAttribute()) + { + attrs.insert(*attr); + } + if (this->nextNChars(2) != "?>") + { + return std::nullopt; + } + if (auto decl = Declaration::BuildFromAttrs(attrs); decl) + { + return decl; + } + else + { + return std::nullopt; + } + } + std::optional> Parser::ParseElement() { this->skipWhiteSpaces(); @@ -241,7 +286,20 @@ namespace myxml std::optional Parser::ParseDocument() { - return std::optional(); + Document document; + if (auto decl = this->parseDeclaration(); decl) + { + document.SetDeclaration(*decl); + } + if (auto root = this->ParseElement(); root) + { + document.SetRoot(*root); + } + else + { + return std::nullopt; + } + return document; } Parser::Parser(std::string_view buffer) diff --git a/src/parser.hpp b/src/parser.hpp index ab4334b..9c3374d 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -33,11 +33,22 @@ namespace myxml std::string buffer; std::size_t offset; + /** + * TODO: + * Define Exceptions , so for all parsing method, + * return std::nullopt means `not this one`, + * and throw exception means `parsing error` + */ + void skipWhiteSpaces(); // return and not consume current character std::optional peekChar(); + // return and not consume next n characters + std::optional peekNextNChars(int); // return and consume current character std::optional nextChar(); + + std::optional nextNChars(int); // return and consume a ident // will not consume ident if failed std::optional parseIdent(); @@ -52,6 +63,9 @@ namespace myxml // return the entire element // will consume buffer if failed std::optional> parseElementWithHeader(ElementTag header); + // return the declartion + // will not consume buffer if failed + std::optional parseDeclaration(); public: // return and consume current element @@ -60,7 +74,8 @@ namespace myxml // return and consume current tag // will consume buffer if failed std::optional ParseTag(); - + // return and consume whole document + // will consume buffer if failed std::optional ParseDocument(); Parser() = delete; explicit Parser(std::string_view); From ada40cf4e903f8507fa55a5a7813b4de23d475e3 Mon Sep 17 00:00:00 2001 From: adamska <2639980868@qq.com> Date: Mon, 26 Aug 2024 20:38:29 +0800 Subject: [PATCH 5/5] fix(parser.hpp): some bugs on parsing doc --- src/document.cpp | 36 +++++++++++------------------------- src/document.hpp | 2 ++ src/parser.cpp | 11 ++++++----- src/parser.hpp | 4 ++-- tests/CMakeLists.txt | 6 +++++- tests/document_test.cpp | 28 ++++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 33 deletions(-) create mode 100644 tests/document_test.cpp diff --git a/src/document.cpp b/src/document.cpp index e2a192d..7fb7fba 100644 --- a/src/document.cpp +++ b/src/document.cpp @@ -1,5 +1,6 @@ #include #include "document.hpp" +#include "parser.hpp" namespace myxml { @@ -33,6 +34,11 @@ namespace myxml return this->root; } + std::optional Document::Parse(std::string input) + { + return Parser(input).ParseDocument(); + } + std::optional Declaration::BuildFromAttrs(std::map attrs) { if (!attrs.count("version") || !util::isValidXmlVersion(attrs["version"])) @@ -71,32 +77,12 @@ namespace myxml bool isValidXmlEncoding(std::string_view encoding) { - // Registered at iana.org + // FIXME: not cover all valid encoding static std::set> valid{ - "US-ASCII", - "ISO-8859-1", - "ISO-8859-2", - "ISO-8859-3", - "ISO-8859-4", - "ISO-8859-5", - "ISO-8859-6", - "ISO-8859-7", - "ISO-8859-8", - "ISO-8859-9", - "ISO-8859-10", - "Shift_JIS", - "EUC-JP", - "ISO-2022-KR", - "EUC-KR", - "ISO-2022-JP", - "ISO-2022-JP-2", - "ISO-8859-6-E", - "ISO-8859-6-I", - "ISO-8859-8-E", - "ISO-8859-8-I", - "GB2312", - "Big5", - "KOI8-R", + "UTF-8", + "UTF-16", + "UTF-32", + "GBK", }; return valid.count(encoding); } diff --git a/src/document.hpp b/src/document.hpp index 7e1103d..5968fe0 100644 --- a/src/document.hpp +++ b/src/document.hpp @@ -32,6 +32,8 @@ namespace myxml Declaration &GetDeclartion(); const std::shared_ptr &GetRoot() const; std::shared_ptr GetRoot(); + + static std::optional Parse(std::string); }; namespace util diff --git a/src/parser.cpp b/src/parser.cpp index 6d2499c..19ee987 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -22,7 +22,7 @@ namespace myxml } } - std::optional Parser::peekNextNChars(int n) + std::optional Parser::peekNextNChars(int n) { if (this->offset + n - 1 < this->buffer.length()) { @@ -45,9 +45,9 @@ namespace myxml return std::nullopt; } - std::optional Parser::nextNChars(int n) + std::optional Parser::nextNChars(int n) { - std::string_view nchars = this->buffer.substr(this->offset, this->offset + n); + auto nchars = this->buffer.substr(this->offset, n); this->offset += n; return nchars; } @@ -195,16 +195,17 @@ namespace myxml std::optional Parser::parseDeclaration() { - if (this->peekNextNChars(2) != "peekNextNChars(5) != "nextNChars(2); + this->nextNChars(5); std::map attrs; while (auto attr = this->parseAttribute()) { attrs.insert(*attr); } + this->skipWhiteSpaces(); if (this->nextNChars(2) != "?>") { return std::nullopt; diff --git a/src/parser.hpp b/src/parser.hpp index 9c3374d..16d3bff 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -44,11 +44,11 @@ namespace myxml // return and not consume current character std::optional peekChar(); // return and not consume next n characters - std::optional peekNextNChars(int); + std::optional peekNextNChars(int); // return and consume current character std::optional nextChar(); - std::optional nextNChars(int); + std::optional nextNChars(int); // return and consume a ident // will not consume ident if failed std::optional parseIdent(); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 76beb38..f8e2d17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,15 +3,19 @@ include(CTest) add_executable(element_test element_test.cpp) add_executable(parser_test parser_test.cpp) add_executable(exportable_test exportable_test.cpp) +add_executable(document_test document_test.cpp) target_link_libraries(element_test Catch2::Catch2WithMain myxml) target_link_libraries(parser_test Catch2::Catch2WithMain myxml) target_link_libraries(exportable_test Catch2::Catch2WithMain myxml) +target_link_libraries(document_test Catch2::Catch2WithMain myxml) target_compile_features(element_test PRIVATE cxx_std_17) target_compile_features(parser_test PRIVATE cxx_std_17) target_compile_features(exportable_test PRIVATE cxx_std_17) +target_compile_features(document_test PRIVATE cxx_std_17) add_test(NAME element_test COMMAND element_test) add_test(NAME parser_test COMMAND parser_test) -add_test(NAME exportable_test COMMAND exportable_test) \ No newline at end of file +add_test(NAME exportable_test COMMAND exportable_test) +add_test(NAME document_test COMMAND document_test) \ No newline at end of file diff --git a/tests/document_test.cpp b/tests/document_test.cpp new file mode 100644 index 0000000..6b124bf --- /dev/null +++ b/tests/document_test.cpp @@ -0,0 +1,28 @@ +#include +#include "document.cpp" + +TEST_CASE("Simple document", "[document]") +{ + SECTION("No decl") + { + std::string input = R"( + Value +)"; + auto doc = myxml::Document::Parse(input); + REQUIRE(doc->GetRoot()->GetName() == "root"); + REQUIRE(doc->GetRoot()->Elem("child")->GetName() == "child"); + REQUIRE(doc->GetRoot()->Elem("child")->FirstChild()->AsText().value()->ExportRaw() == "Value"); + } + + SECTION("With decl") + { + std::string input = R"( + + Value + +)"; + auto doc = myxml::Document::Parse(input); + REQUIRE(doc->GetDeclartion().version == "1.0"); + REQUIRE(doc->GetDeclartion().encoding == "UTF-8"); + } +} \ No newline at end of file