Skip to content

Commit

Permalink
feat(text.hpp): newline normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
Adamska1008 committed Aug 30, 2024
1 parent 52cf642 commit 4f3852b
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 48 deletions.
1 change: 0 additions & 1 deletion include/myxml/document.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ namespace myxml
/* Exportable */
virtual std::string ExportRaw() const;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const;
virtual void SetEntityEncoding(bool flag) override;
};

class Document : public Exportable
Expand Down
1 change: 0 additions & 1 deletion include/myxml/element.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,5 @@ namespace myxml
/* Implement Exportable */
virtual std::string ExportRaw() const override;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const override;
virtual void SetEntityEncoding(bool) override;
};
}
15 changes: 14 additions & 1 deletion include/myxml/exportable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,25 @@

namespace myxml
{
class ExportConfig
{
public:
bool EntityEncoding;
bool PlatformSpecificNewline;

ExportConfig();
};

class Exportable
{
protected:
ExportConfig config;

public:
virtual ~Exportable() = default;
virtual std::string ExportRaw() const = 0;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const = 0;
virtual void SetEntityEncoding(bool) = 0;
virtual void SetEntityEncoding(bool) {};
virtual void SetPlatformSpecificNewline(bool) {};
};
}
8 changes: 8 additions & 0 deletions include/myxml/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ namespace myxml
std::enable_if_t<std::is_base_of_v<Node, T>,
std::optional<std::shared_ptr<T>>>
As();

/** Implement Export */
virtual void SetEntityEncoding(bool) override;
virtual void SetPlatformSpecificNewline(bool) override;
};

// Element are Composite Node.
Expand All @@ -52,6 +56,10 @@ namespace myxml
std::shared_ptr<Node> InsertAtFront(const std::shared_ptr<Node> &);
std::shared_ptr<Node> InsertAtEnd(const std::shared_ptr<Node> &);
void Unlink(const std::shared_ptr<Node> &);

/** Implement Export */
virtual void SetEntityEncoding(bool) override;
virtual void SetPlatformSpecificNewline(bool) override;
};

template <typename T>
Expand Down
7 changes: 5 additions & 2 deletions include/myxml/text.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ namespace myxml
{
private:
std::string inner;
bool encodeOnExport;

public:
explicit Text(std::string_view str);
Expand All @@ -21,6 +20,10 @@ namespace myxml
/* Implment Exportable*/
virtual std::string ExportRaw() const override;
virtual std::string ExportFormatted(int indentLevel = 0, int indentSize = 4) const override;
virtual void SetEntityEncoding(bool) override;
};

namespace util
{
const char *const platformSpecificNewline();
};
}
4 changes: 0 additions & 4 deletions src/document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ namespace myxml
return std::string(' ', indentLevel * indentSize) + this->ExportRaw();
}

void Declaration::SetEntityEncoding(bool flag)
{
}

namespace util
{
bool isValidXmlVersion(std::string_view version)
Expand Down
8 changes: 0 additions & 8 deletions src/element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,4 @@ namespace myxml
builder += indent + "</" + std::string(this->GetName()) + ">\n";
return builder;
}

void Element::SetEntityEncoding(bool flag)
{
for (auto it = this->FirstChild(); it != nullptr; it = it->next)
{
it->SetEntityEncoding(flag);
}
}
}
10 changes: 10 additions & 0 deletions src/exportable.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "myxml/exportable.hpp"

namespace myxml
{
ExportConfig::ExportConfig()
: EntityEncoding(true),
PlatformSpecificNewline(false)
{
}
}
25 changes: 25 additions & 0 deletions src/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@

namespace myxml
{
void Node::SetEntityEncoding(bool flag)
{
this->config.EntityEncoding = flag;
}

void Node::SetPlatformSpecificNewline(bool flag)
{
this->config.PlatformSpecificNewline = flag;
}

std::shared_ptr<Node> CompositeNode::LastChild()
{
Expand Down Expand Up @@ -120,4 +128,21 @@ namespace myxml
elem->parent = nullptr;
}

void CompositeNode::SetEntityEncoding(bool flag)
{
this->config.EntityEncoding = flag;
for (auto it = this->FirstChild(); it != nullptr; it = it->next)
{
it->SetEntityEncoding(flag);
}
}

void CompositeNode::SetPlatformSpecificNewline(bool flag)
{
this->config.PlatformSpecificNewline = flag;
for (auto it = this->FirstChild(); it != nullptr; it = it->next)
{
it->SetPlatformSpecificNewline(flag);
}
}
}
100 changes: 69 additions & 31 deletions src/text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,65 @@ namespace myxml
{

Text::Text(std::string_view input)
: encodeOnExport(true)
{
// entity encoding
static std::map<std::string, char, std::less<>> entityMap = {
{"&lt;", '<'},
{"&gt;", '>'},
{"&amp;", '&'},
{"&quot;", '"'},
{"&apos;", '\''},
};
std::size_t len = input.length();
std::size_t start = 0; // start of current segment
for (std::size_t i = 0; i < len; i++)
if (config.EntityEncoding)
{
if (input[i] == '&')
// entity encoding
static std::map<std::string, char, std::less<>> entityMap = {
{"&lt;", '<'},
{"&gt;", '>'},
{"&amp;", '&'},
{"&quot;", '"'},
{"&apos;", '\''},
};
std::size_t len = input.length();
std::size_t start = 0; // start of current segment

for (std::size_t i = 0; i < len; i++)
{
if (auto semicolonPos = input.find(';', i); semicolonPos != std::string::npos)
// Newline Normalization
if (input[i] == '\r')
{
std::string_view entity = input.substr(i, semicolonPos - i + 1);
if (auto it = entityMap.find(entity); it != entityMap.end())
this->inner += input.substr(start, i - start);
if (i + 1 < len && input[i + 1] == '\n')
{
this->inner += input.substr(start, i - start); // append unmodified segment
this->inner += it->second; // append decoded character
i = semicolonPos; // skip past the entity
start = semicolonPos + 1; // update last unappend position
i += 1;
}
this->inner += '\n';
start = i + 1;
}
// Entity Decoding
if (input[i] == '&')
{
if (auto semicolonPos = input.find(';', i); semicolonPos != std::string::npos)
{
std::string_view entity = input.substr(i, semicolonPos - i + 1);
if (auto it = entityMap.find(entity); it != entityMap.end())
{
this->inner += input.substr(start, i - start); // append unmodified segment
this->inner += it->second; // append decoded character
i = semicolonPos; // skip past the entity
start = semicolonPos + 1; // update last unappend position
}
}
}
}
this->inner += input.substr(start, len - start); // append the remaining
}
else
{
this->inner = input;
}
this->inner += input.substr(start, len - start); // append the remaining
}

bool Text::IsAllSpace() const
{
return std::all_of(this->inner.begin(), this->inner.end(), isspace);
}

void Text::SetEntityEncoding(bool flag)
{
this->encodeOnExport = flag;
}

std::string Text::ExportRaw() const
{
if (!this->encodeOnExport)
if (!this->config.EntityEncoding && !this->config.PlatformSpecificNewline)
{
return this->inner;
}
Expand All @@ -68,11 +82,23 @@ namespace myxml
std::string builder;
for (std::size_t i = 0; i < len; i++)
{
if (auto it = entityMap.find(this->inner[i]); it != entityMap.end())
if (this->config.EntityEncoding)
{
builder += this->inner.substr(start, i - start);
builder += it->second;
start = i + 1;
if (auto it = entityMap.find(this->inner[i]); it != entityMap.end())
{
builder += this->inner.substr(start, i - start);
builder += it->second;
start = i + 1;
}
}
if (this->config.PlatformSpecificNewline)
{
if (this->inner[i] == '\n')
{
builder += this->inner.substr(start, i - start);
builder += util::platformSpecificNewline();
start = i + 1;
}
}
}
builder += this->inner.substr(start, len - start);
Expand All @@ -85,4 +111,16 @@ namespace myxml
// TODO: better implementation
return std::string(indentLevel * indentSize, ' ') + this->inner + '\n';
}

namespace util
{
const char *const platformSpecificNewline()
{
#ifdef _WIN32
return "\r\n";
#else
return "\n";
#endif
}
}
}
12 changes: 12 additions & 0 deletions tests/parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,16 @@ TEST_CASE("Parsing simple xml elements", "[parser]")

REQUIRE(elem->FirstChild()->As<myxml::CData>().value()->ExportRaw() == "<![CDATA[Hello!]]>\n");
}

SECTION("Newline Normalization")
{
std::string nl = "<root>hello\r\n</root>";
auto elem = myxml::Element::Parse(nl);

REQUIRE(elem->FirstChild()->As<myxml::Text>().value()->ExportRaw() == "hello\n");
nl = "<root>hello\r</root>";

elem = myxml::Element::Parse(nl);
REQUIRE(elem->FirstChild()->As<myxml::Text>().value()->ExportRaw() == "hello\n");
}
}

0 comments on commit 4f3852b

Please sign in to comment.