Skip to content

Commit

Permalink
Merge pull request #10 from Adamska1008/9-handle-special-character-es…
Browse files Browse the repository at this point in the history
…caping-for-xml-import-and-export

9 handle special character escaping for xml import and export
  • Loading branch information
Adamska1008 authored Aug 28, 2024
2 parents f2d9306 + 61b470e commit a6dbb51
Showing 9 changed files with 152 additions and 14 deletions.
39 changes: 39 additions & 0 deletions src/document.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <set>
#include <fmt/core.h>
#include "document.hpp"
#include "parser.hpp"

@@ -39,6 +40,21 @@ namespace myxml
return Parser(input).ParseDocument();
}

std::string Document::ExportRaw() const
{
return this->declaration.ExportRaw() + this->root->ExportRaw();
}

std::string Document::ExportFormatted(int indentLevel, int indentSize) const
{
return this->declaration.ExportFormatted(indentLevel + 1, indentSize) + this->root->ExportFormatted(indentLevel + 1, indentSize);
}

void Document::SetEntityEncoding(bool flag)
{
this->root->SetEntityEncoding(flag);
}

std::optional<Declaration> Declaration::BuildFromAttrs(std::map<std::string, std::string> attrs)
{
if (!attrs.count("version") || !util::isValidXmlVersion(attrs["version"]))
@@ -68,6 +84,29 @@ namespace myxml
return declaration;
}

std::string Declaration::ExportRaw() const
{
std::string builder = fmt::format("<?xml version={}", this->version);
if (this->encoding)
{
builder += " encoding=" + (*this->encoding);
}
if (this->standalone)
{
builder += " standalone=" + (*this->standalone);
}
return builder + "?>\n";
}

std::string Declaration::ExportFormatted(int indentLevel, int indentSize) const
{
return std::string(' ', indentLevel * indentSize) + this->ExportRaw();
}

void Declaration::SetEntityEncoding(bool flag)
{
}

namespace util
{
bool isValidXmlVersion(std::string_view version)
14 changes: 12 additions & 2 deletions src/document.hpp
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
// Declaration and Documant are both NOT Node
namespace myxml
{
struct Declaration
struct Declaration : public Exportable
{
std::string version;
std::optional<std::string> encoding;
@@ -14,9 +14,14 @@ namespace myxml
// return `std::nullopt` if declartion is in bad format
// TODO: use exception to distinguish each of bad format
static std::optional<Declaration> BuildFromAttrs(std::map<std::string, std::string> attrs);

/* Exportable */
virtual std::string ExportRaw() const;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const;
virtual void SetEntityEncoding(bool flag) override;
};

class Document
class Document : public Exportable
{
private:
Declaration declaration;
@@ -34,6 +39,11 @@ namespace myxml
std::shared_ptr<Element> GetRoot();

static std::optional<Document> Parse(std::string);

/* Exportable */
virtual std::string ExportRaw() const;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const;
virtual void SetEntityEncoding(bool flag) override;
};

namespace util
10 changes: 9 additions & 1 deletion src/element.cpp
Original file line number Diff line number Diff line change
@@ -58,7 +58,7 @@ namespace myxml
return NodeType::Element;
}

bool Element::isType(NodeType type)
bool Element::IsType(NodeType type)
{
return type == NodeType::Element;
}
@@ -116,4 +116,12 @@ namespace myxml
builder += indent + "</" + std::string(this->GetName()) + ">\n";
return builder;
}

void Element::SetEntityEncoding(bool flag)
{
for (auto it = this->FirstChild(); it != nullptr; it = it->next)
{
it->SetEntityEncoding(flag);
}
}
}
3 changes: 2 additions & 1 deletion src/element.hpp
Original file line number Diff line number Diff line change
@@ -47,12 +47,13 @@ namespace myxml

/* Implement Node */
virtual NodeType Type() override;
virtual bool isType(NodeType) override;
virtual bool IsType(NodeType) override;
virtual std::optional<std::shared_ptr<Element>> AsElement() override;
virtual std::optional<std::shared_ptr<Text>> AsText() override;

/* Implement Exportable */
virtual std::string ExportRaw() const override;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const override;
virtual void SetEntityEncoding(bool) override;
};
}
1 change: 1 addition & 0 deletions src/exportable.hpp
Original file line number Diff line number Diff line change
@@ -9,5 +9,6 @@ namespace myxml
virtual ~Exportable() = default;
virtual std::string ExportRaw() const = 0;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const = 0;
virtual void SetEntityEncoding(bool) = 0;
};
}
2 changes: 1 addition & 1 deletion src/node.hpp
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ namespace myxml
std::shared_ptr<Node> next;

virtual NodeType Type() = 0;
virtual bool isType(NodeType) = 0;
virtual bool IsType(NodeType) = 0;
virtual std::optional<std::shared_ptr<Element>> AsElement() = 0;
virtual std::optional<std::shared_ptr<Text>> AsText() = 0;
};
74 changes: 68 additions & 6 deletions src/text.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <algorithm>
#include <unordered_map>
#include "text.hpp"

namespace myxml
@@ -8,7 +9,7 @@ namespace myxml
return NodeType::Text;
}

bool Text::isType(NodeType type)
bool Text::IsType(NodeType type)
{
return type == NodeType::Text;
}
@@ -24,23 +25,84 @@ namespace myxml
return std::dynamic_pointer_cast<Text>(this->shared_from_this());
}

Text::Text(std::string_view str)
: inner(str) {}
Text::Text(std::string_view input)
: encodeOnExport(true)
{
// entity encoding
static std::map<std::string, char, std::less<>> entityMap = {
{"&lt;", '<'},
{"&gt;", '>'},
{"&amp;", '&'},
{"&quot;", '"'},
{"&apos;", '\''},
};
std::size_t len = input.length();
std::size_t start = 0; // start of current segment
for (std::size_t i = 0; i < len; i++)
{
if (input[i] == '&')
{
if (auto semicolonPos = input.find(';', i); semicolonPos != std::string::npos)
{
std::string_view entity = input.substr(i, semicolonPos - i + 1);
if (auto it = entityMap.find(entity); it != entityMap.end())
{
this->inner += input.substr(start, i - start); // append unmodified segment
this->inner += it->second; // append decoded character
i = semicolonPos; // skip past the entity
start = semicolonPos + 1; // update last unappend position
}
}
}
}
this->inner += input.substr(start, len - start); // append the remaining
}

bool Text::isAllSpace() const
bool Text::IsAllSpace() const
{
return std::all_of(this->inner.begin(), this->inner.end(), isspace);
}

void Text::SetEntityEncoding(bool flag)
{
this->encodeOnExport = flag;
}

std::string Text::ExportRaw() const
{
return this->inner;
if (!this->encodeOnExport)
{
return this->inner;
}
else
{
static std::unordered_map<char, std::string> entityMap = {
{'<', "&lt;"},
{'>', "&gt;"},
{'&', "&amp;"},
{'"', "&quot;"},
{'\'', "&apos;"},
};
std::size_t start = 0; // start of current segement
std::size_t len = this->inner.length();
std::string builder;
for (std::size_t i = 0; i < len; i++)
{
if (auto it = entityMap.find(this->inner[i]); it != entityMap.end())
{
builder += this->inner.substr(start, i - start);
builder += it->second;
start = i + 1;
}
}
builder += this->inner.substr(start, len - start);
return builder;
}
}

std::string Text::ExportFormatted(int indentLevel, int indentSize) const
{
// TODO: better implementation
return std::string(indentLevel * indentSize, ' ') + this->inner + '\n';
}

}
8 changes: 5 additions & 3 deletions src/text.hpp
Original file line number Diff line number Diff line change
@@ -8,21 +8,23 @@ namespace myxml
{
private:
std::string inner;
bool encodeOnExport;

public:
explicit Text(std::string_view str);

/* Used in Export*/
bool isAllSpace() const;
// may used in Export
bool IsAllSpace() const;

/* implement Node */
virtual NodeType Type() override;
virtual bool isType(NodeType) override;
virtual bool IsType(NodeType) override;
virtual std::optional<std::shared_ptr<Element>> AsElement() override;
virtual std::optional<std::shared_ptr<Text>> AsText() override;

/* Implment Exportable*/
virtual std::string ExportRaw() const override;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const override;
virtual void SetEntityEncoding(bool) override;
};
}
15 changes: 15 additions & 0 deletions tests/parser_test.cpp
Original file line number Diff line number Diff line change
@@ -185,4 +185,19 @@ TEST_CASE("Parsing simple xml elements", "[parser]")
// 检查子元素的文本内容
REQUIRE(child->FirstChild()->AsText().value()->ExportRaw() == "Text");
}

SECTION("Decoding entity")
{
std::string root = R"(<root>
&lt;&gt;
</root>)";
auto elem = myxml::Element::Parse(root);

elem->SetEntityEncoding(false);
REQUIRE(elem->GetName() == "root");
REQUIRE(elem->FirstChild()->AsText().value()->ExportRaw() == "\n <>\n");

elem->SetEntityEncoding(true);
REQUIRE(elem->FirstChild()->AsText().value()->ExportRaw() == "\n &lt;&gt;\n");
}
}

0 comments on commit a6dbb51

Please sign in to comment.