Change N-Triples literal output encoding to limit the number of ECHAR…

… escapes used based on [Canonical form of N-Triples](https://www.w3.org/TR/n-triples/#canonical-ntriples): > Within STRING_LITERAL_QUOTE, only the characters `U+0022`, `U+005C`, `U+000A`, `U+000D` are encoded using `ECHAR`. `ECHAR **must not** be used for characters that are allowed directly in STRING_LITERAL_QUOTE.
ruby-rdf · Jan 20, 2019 · f859e03 · f859e03
1 parent e753d0c
commit f859e03
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 14 deletions.
diff --git a/lib/rdf/ntriples/writer.rb b/lib/rdf/ntriples/writer.rb
@@ -116,6 +116,8 @@ def self.escape_unicode(u, encoding)
     # sequences, otherwise, assume the test-cases escape sequences. Otherwise,
     # the N-Triples recommendation includes `\b` and `\f` escape sequences.
     #
+    # Within STRING_LITERAL_QUOTE, only the characters `U+0022`, `U+005C`, `U+000A`, `U+000D` are encoded using `ECHAR`. `ECHAR` must not be used for characters that are allowed directly in STRING_LITERAL_QUOTE.
+    #
     # @param  [Integer, #ord] u
     # @return [String]
     # @raise  [ArgumentError] if `u` is not a valid Unicode codepoint
@@ -124,15 +126,10 @@ def self.escape_unicode(u, encoding)
     def self.escape_ascii(u, encoding)
       case (u = u.ord)
         when (0x00..0x07) then escape_utf16(u)
-        when (0x08)       then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\b")
-        when (0x09)       then "\\t"
         when (0x0A)       then "\\n"
-        when (0x0B)       then escape_utf16(u)
-        when (0x0C)       then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\f")
         when (0x0D)       then "\\r"
         when (0x0E..0x1F) then escape_utf16(u)
         when (0x22)       then "\\\""
-        when (0x27)       then "\\'"
         when (0x5C)       then "\\\\"
         when (0x7F)       then escape_utf16(u)
         when (0x00..0x7F) then u.chr

diff --git a/spec/ntriples_spec.rb b/spec/ntriples_spec.rb
@@ -672,16 +672,18 @@
 
       # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
       it "should correctly escape ASCII characters (#x0-#x7F)" do
-        (0x00..0x08).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
-        expect(writer.escape(0x09.chr, encoding)).to eq "\\t"
+        (0x00..0x07).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
+        expect(writer.escape(0x08.chr, encoding)).to eq "\b"
+        expect(writer.escape(0x09.chr, encoding)).to eq "\t"
         expect(writer.escape(0x0A.chr, encoding)).to eq "\\n"
-        (0x0B..0x0C).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
+        expect(writer.escape(0x0B.chr, encoding)).to eq "\v"
+        expect(writer.escape(0x0C.chr, encoding)).to eq "\f"
         expect(writer.escape(0x0D.chr, encoding)).to eq "\\r"
         (0x0E..0x1F).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
         (0x20..0x21).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
         expect(writer.escape(0x22.chr, encoding)).to eq "\\\""
         (0x23..0x26).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
-        expect(writer.escape(0x27.chr, encoding)).to eq "\\'"
+        expect(writer.escape(0x27.chr, encoding)).to eq "'"
         (0x28..0x5B).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
         expect(writer.escape(0x5C.chr, encoding)).to eq "\\\\"
         (0x5D..0x7E).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
@@ -733,17 +735,17 @@
       # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
       it "should correctly escape ASCII characters (#x0-#x7F)" do
         (0x00..0x07).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
-        expect(writer.escape(0x08.chr, encoding)).to eq (encoding ? "\\b" : "\\u0008")
-        expect(writer.escape(0x09.chr, encoding)).to eq "\\t"
+        expect(writer.escape(0x08.chr, encoding)).to eq "\b"
+        expect(writer.escape(0x09.chr, encoding)).to eq "\t"
         expect(writer.escape(0x0A.chr, encoding)).to eq "\\n"
-        (0x0B..0x0B).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
-        expect(writer.escape(0x0C.chr, encoding)).to eq (encoding ? "\\f" : "\\u000C")
+        expect(writer.escape(0x0B.chr, encoding)).to eq "\v"
+        expect(writer.escape(0x0C.chr, encoding)).to eq "\f"
         expect(writer.escape(0x0D.chr, encoding)).to eq "\\r"
         (0x0E..0x1F).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
         (0x20..0x21).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
         expect(writer.escape(0x22.chr, encoding)).to eq "\\\""
         (0x23..0x26).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
-        expect(writer.escape(0x27.chr, encoding)).to eq "\\'"
+        expect(writer.escape(0x27.chr, encoding)).to eq "'"
         (0x28..0x5B).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
         expect(writer.escape(0x5C.chr, encoding)).to eq "\\\\"
         (0x5D..0x7E).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }