From fcde80061971184bf48b85659be3b800f11280e4 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 24 Oct 2023 11:22:38 -0700 Subject: [PATCH] Don't normalize literal direction so that validation can work. --- lib/rdf/model/literal.rb | 2 +- lib/rdf/ntriples/reader.rb | 16 ++++++++-------- spec/ntriples_spec.rb | 5 +++++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/rdf/model/literal.rb b/lib/rdf/model/literal.rb index 28e06f04..302b0c6c 100644 --- a/lib/rdf/model/literal.rb +++ b/lib/rdf/model/literal.rb @@ -194,7 +194,7 @@ def initialize(value, language: nil, datatype: nil, direction: nil, lexical: nil @string = @string.encode(Encoding::UTF_8).freeze if instance_variable_defined?(:@string) @object = @string if instance_variable_defined?(:@string) && @object.is_a?(String) @language = language.to_s.downcase.to_sym if language - @direction = direction.to_s.downcase.to_sym if direction + @direction = direction.to_s.to_sym if direction @datatype = RDF::URI(datatype).freeze if datatype @datatype ||= self.class.const_get(:DATATYPE) if self.class.const_defined?(:DATATYPE) @datatype ||= if instance_variable_defined?(:@language) && @language && diff --git a/lib/rdf/ntriples/reader.rb b/lib/rdf/ntriples/reader.rb index 832f8942..4fbb21a1 100644 --- a/lib/rdf/ntriples/reader.rb +++ b/lib/rdf/ntriples/reader.rb @@ -51,14 +51,14 @@ class Reader < RDF::Reader # @see http://www.w3.org/TR/turtle/ ## # Unicode regular expressions. - U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, '')) - [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]| - [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]| - [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]| - [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}] - EOS - U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]").freeze - IRI_RANGE = Regexp.compile("[[^<>\"{}\|\^`\\\\]&&[^\\x00-\\x20]]").freeze + U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, '')) + [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]| + [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]| + [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]| + [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}] + EOS + U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]").freeze + IRI_RANGE = Regexp.compile("[[^<>\"{}\|\^`\\\\]&&[^\\x00-\\x20]]").freeze PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/.freeze PN_CHARS_U = /_|#{PN_CHARS_BASE}/.freeze diff --git a/spec/ntriples_spec.rb b/spec/ntriples_spec.rb index f2828aff..0d42ff09 100644 --- a/spec/ntriples_spec.rb +++ b/spec/ntriples_spec.rb @@ -322,6 +322,7 @@ { "language" => ' "Hello"@en .', "direction" => ' "Hello"@en--ltr .', + "direction2" => ' "Hello"@en--rtl .', }.each_pair do |name, triple| specify "test #{name}" do stmt = reader.new(triple, rdfstar: true).first @@ -518,6 +519,10 @@ %q( "string"@--ltr .), %r(Expected end of statement) ], + "xx bad dir 3" => [ + %q( "string"@en--LTR .), + %r(Invalid Literal) + ], "nt-syntax-bad-string-05" => [ %q( """abc""" .), %r(Expected end of statement \(found: .* \."\))