Skip to content

Commit

Permalink
Change N-Triples literal output encoding to limit the number of ECHAR…
Browse files Browse the repository at this point in the history
… escapes used based on [Canonical form of N-Triples](https://www.w3.org/TR/n-triples/#canonical-ntriples):

> Within STRING_LITERAL_QUOTE, only the characters `U+0022`, `U+005C`, `U+000A`, `U+000D` are encoded using `ECHAR`. `ECHAR **must not** be used for characters that are allowed directly in STRING_LITERAL_QUOTE.
  • Loading branch information
gkellogg committed Jan 20, 2019
1 parent e753d0c commit f859e03
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 14 deletions.
7 changes: 2 additions & 5 deletions lib/rdf/ntriples/writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def self.escape_unicode(u, encoding)
# sequences, otherwise, assume the test-cases escape sequences. Otherwise,
# the N-Triples recommendation includes `\b` and `\f` escape sequences.
#
# Within STRING_LITERAL_QUOTE, only the characters `U+0022`, `U+005C`, `U+000A`, `U+000D` are encoded using `ECHAR`. `ECHAR` must not be used for characters that are allowed directly in STRING_LITERAL_QUOTE.
#
# @param [Integer, #ord] u
# @return [String]
# @raise [ArgumentError] if `u` is not a valid Unicode codepoint
Expand All @@ -124,15 +126,10 @@ def self.escape_unicode(u, encoding)
def self.escape_ascii(u, encoding)
case (u = u.ord)
when (0x00..0x07) then escape_utf16(u)
when (0x08) then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\b")
when (0x09) then "\\t"
when (0x0A) then "\\n"
when (0x0B) then escape_utf16(u)
when (0x0C) then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\f")
when (0x0D) then "\\r"
when (0x0E..0x1F) then escape_utf16(u)
when (0x22) then "\\\""
when (0x27) then "\\'"
when (0x5C) then "\\\\"
when (0x7F) then escape_utf16(u)
when (0x00..0x7F) then u.chr
Expand Down
20 changes: 11 additions & 9 deletions spec/ntriples_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -672,16 +672,18 @@

# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
it "should correctly escape ASCII characters (#x0-#x7F)" do
(0x00..0x08).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
expect(writer.escape(0x09.chr, encoding)).to eq "\\t"
(0x00..0x07).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
expect(writer.escape(0x08.chr, encoding)).to eq "\b"
expect(writer.escape(0x09.chr, encoding)).to eq "\t"
expect(writer.escape(0x0A.chr, encoding)).to eq "\\n"
(0x0B..0x0C).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
expect(writer.escape(0x0B.chr, encoding)).to eq "\v"
expect(writer.escape(0x0C.chr, encoding)).to eq "\f"
expect(writer.escape(0x0D.chr, encoding)).to eq "\\r"
(0x0E..0x1F).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
(0x20..0x21).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
expect(writer.escape(0x22.chr, encoding)).to eq "\\\""
(0x23..0x26).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
expect(writer.escape(0x27.chr, encoding)).to eq "\\'"
expect(writer.escape(0x27.chr, encoding)).to eq "'"
(0x28..0x5B).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
expect(writer.escape(0x5C.chr, encoding)).to eq "\\\\"
(0x5D..0x7E).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
Expand Down Expand Up @@ -733,17 +735,17 @@
# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
it "should correctly escape ASCII characters (#x0-#x7F)" do
(0x00..0x07).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
expect(writer.escape(0x08.chr, encoding)).to eq (encoding ? "\\b" : "\\u0008")
expect(writer.escape(0x09.chr, encoding)).to eq "\\t"
expect(writer.escape(0x08.chr, encoding)).to eq "\b"
expect(writer.escape(0x09.chr, encoding)).to eq "\t"
expect(writer.escape(0x0A.chr, encoding)).to eq "\\n"
(0x0B..0x0B).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
expect(writer.escape(0x0C.chr, encoding)).to eq (encoding ? "\\f" : "\\u000C")
expect(writer.escape(0x0B.chr, encoding)).to eq "\v"
expect(writer.escape(0x0C.chr, encoding)).to eq "\f"
expect(writer.escape(0x0D.chr, encoding)).to eq "\\r"
(0x0E..0x1F).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
(0x20..0x21).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
expect(writer.escape(0x22.chr, encoding)).to eq "\\\""
(0x23..0x26).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
expect(writer.escape(0x27.chr, encoding)).to eq "\\'"
expect(writer.escape(0x27.chr, encoding)).to eq "'"
(0x28..0x5B).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
expect(writer.escape(0x5C.chr, encoding)).to eq "\\\\"
(0x5D..0x7E).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
Expand Down

0 comments on commit f859e03

Please sign in to comment.