Skip to content
Open
60 changes: 55 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,68 @@ when it's needed.
Typecasting
-----------

Besides regular typecasting, Nori features somewhat "advanced" typecasting:

* "true" and "false" String values are converted to `TrueClass` and `FalseClass`.
* String values matching xs:time, xs:date and xs:dateTime are converted
to `Time`, `Date` and `DateTime` objects.
### Auto-Detection / Advanced Typecasting

You can disable this feature via:
Nori supports auto detection for text nodes without a defined type,
beside the regular typecasting mechanism that uses the type attribute for conversion.
This option is called **:advanced_typecasting** and can currently detect and cast:

* "true" and "false" values
* XMLSchema types: time, date, dateTime

It is enabled by default and must be disabled explicitly:

``` ruby
Nori.new(:advanced_typecasting => false)
```

see [Nori::TypeConverter::Autodetect](lib/nori/type_converter.rb)


### Custom Type Conversion

Custom types can be converted easily with custom conversions.

E.g to convert a range of integers:

#### XML

``` xml
<?xml version="1.0" encoding="UTF-8"?>
<officeHours type="interval">8..17</officeHours>
```

#### Custom Conversion Class

```ruby
class ToIntRange
def self.convert(value)
return nil if (value.nil? || value.length == 0)
range = value.split('..')
return range.first.to_i..range.last.to_i
end
end

type_converter = Nori::TypeConverter.new('interval' => ToIntRange)
nori = Nori.new(:type_converter => type_converter)
nori.parse(xml)
```

### Namespace Prefix Detection

**Nori::TypeConverter** does type conversion based on the type attribute.
By default the namespace prefix for the type attribute and the type value is empty.
In order to use a non-empty namespace prefix it provides builtin namespace detection for
**XMLSchema / XMLSchema-instance** namespace:

```ruby
xml = request.body.read
type_converter = Nori::DEFAULT_TYPE_CONVERTER.tap {|c| c.detect_namespace_prefixes!(xml)}
nori = Nori.new(:type_converter => type_converter)
```

* You can also use custom namespaces - see [TypeConverter spec](spec/nori/type_converter_spec.rb)

Namespaces
----------
Expand Down
12 changes: 12 additions & 0 deletions lib/nori.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,24 @@ def self.hash_key(name, options = {})

PARSERS = { :rexml => "REXML", :nokogiri => "Nokogiri" }

DEFAULT_TYPE_CONVERTER = TypeConverter.new(
'int|integer' => TypeConverter::ToInteger,
'boolean' => TypeConverter::ToBoolean,
'date[Tt]ime' => TypeConverter::ToTime,
'date' => TypeConverter::ToDate,
'decimal' => TypeConverter::ToDecimal,
'double|float' => TypeConverter::ToFloat,
'string' => TypeConverter::ToString,
'base64Binary' => TypeConverter::Base64ToBinary
)

def initialize(options = {})
defaults = {
:strip_namespaces => false,
:delete_namespace_attributes => false,
:convert_tags_to => nil,
:advanced_typecasting => true,
:type_converter => DEFAULT_TYPE_CONVERTER,
:parser => :nokogiri
}

Expand Down
5 changes: 4 additions & 1 deletion lib/nori/string_with_attributes.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
class Nori
class StringWithAttributes < String

attr_accessor :attributes

def initialize(value, attributes)
super(value)
@attributes = attributes
end
end
end
192 changes: 192 additions & 0 deletions lib/nori/type_converter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
require 'rexml/document'

class Nori
class TypeConverter
attr_accessor :attribute_prefix, :type_attribute_name, :type_prefix, :conversions

def initialize(conversions = {})
@attribute_prefix = nil
@type_attribute_name = 'type'
@type_prefix = nil
@conversions = conversions
end

def namespaced_type_attribute
@namespaced_type_attribute ||= @attribute_prefix.nil? ? @type_attribute_name : "#{@attribute_prefix}:#{@type_attribute_name}"
end

def conversion(type)
if !type.nil? && type_namespace_prefix_matches?(type)
stripped_type = strip_namespace(type)
@conversions.each_pair do |type_pattern, type_converter|
if (stripped_type =~ /^#{type_pattern}$/)
return type_converter
end
end
end
return nil
end

def type(attributes)
attributes[namespaced_type_attribute]
end

def attribute_namespace_prefix_matches?(attribute)
TypeConverter.namespace_prefix_matches?(@attribute_prefix, attribute)
end

def type_namespace_prefix_matches?(type)
TypeConverter.namespace_prefix_matches?(@type_prefix, type)
end

def self.namespace_prefix_matches?(namespace_prefix, attribute_name)
if namespace_prefix.nil? || namespace_prefix.empty?
return attribute_name.index(':').nil?
else
return attribute_name.index("#{namespace_prefix}:") == 0
end
end

def strip_namespace(type)
@type_prefix.nil? ? type : type.gsub(/^#{@type_prefix}:/, '')
end

def detect_namespace_prefixes!(xml, opts = {})
root_node = REXML::Document.new(xml).root
if root_node
namespaces = root_node.namespaces
@attribute_prefix = namespaces.key(opts[:attribute_namespace] || XmlNamespace::XML_SCHEMA_INSTANCE)
@type_prefix = namespaces.key(opts[:type_namespace] || XmlNamespace::XML_SCHEMA)
end
end

module XmlNamespace
XML_SCHEMA = 'http://www.w3.org/2001/XMLSchema'
XML_SCHEMA_INSTANCE = 'http://www.w3.org/2001/XMLSchema-instance'
end

# -- Type Converter

class NoConvert
def self.convert(value)
value
end
end

class ToInteger
def self.convert(value)
(value.nil? || value.length == 0) ? nil : value.to_i
end
end

class ToFloat
def self.convert(value)
(value.nil? || value.length == 0) ? nil : value.to_f
end
end

class ToBoolean
TRUE_VALUES = ['true', '1', 'yes', 'on', 't']
FALSE_VALUES = ['false', '0', 'no', 'off', 'f']

def self.convert(value)
return true if TRUE_VALUES.include? value.strip
return false if FALSE_VALUES.include? value.strip
return nil
end
end

class ToDecimal
def self.convert(value)
(value.nil? || value.length == 0) ? nil : BigDecimal(value.to_s)
end
end

class ToTime
def self.convert(value)
(value.nil? || value.length == 0) ? nil : Time.parse(value).utc
end
end

class ToDate
def self.convert(value)
(value.nil? || value.length == 0) ? nil : Date.parse(value)
end
end

class ToString
def self.convert(value)
value.nil? ? nil : value.to_s
end
end

class Base64ToBinary
def self.convert(value)
(value.nil? || value.length == 0) ? nil : value.unpack('m').first
end
end

class Autodetect

# Simple xs:time Regexp.
# Valid xs:time formats
# 13:20:00 1:20 PM
# 13:20:30.5555 1:20 PM and 30.5555 seconds
# 13:20:00-05:00 1:20 PM, US Eastern Standard Time
# 13:20:00+02:00 1:20 PM, Central European Standard Time
# 13:20:00Z 1:20 PM, Coordinated Universal Time (UTC)
# 00:00:00 midnight
# 24:00:00 midnight

XS_TIME = /^\d{2}:\d{2}:\d{2}[Z\.\-\+]?\d*:?\d*$/

# Simple xs:date Regexp.
# Valid xs:date formats
# 2004-04-12 April 12, 2004
# -0045-01-01 January 1, 45 BC
# 12004-04-12 April 12, 12004
# 2004-04-12-05:00 April 12, 2004, US Eastern Standard Time, which is 5 hours behind Coordinated Universal Time (UTC)
# 2004-04-12+02:00 April 12, 2004, Central European Summer Time, which is 2 hours ahead of Coordinated Universal Time (UTC)
# 2004-04-12Z April 12, 2004, Coordinated Universal Time (UTC)

XS_DATE = /^[-]?\d{4}-\d{2}-\d{2}[Z\-\+]?\d*:?\d*$/

# Simple xs:dateTime Regexp.
# Valid xs:dateTime formats
# 2004-04-12T13:20:00 1:20 pm on April 12, 2004
# 2004-04-12T13:20:15.5 1:20 pm and 15.5 seconds on April 12, 2004
# 2004-04-12T13:20:00-05:00 1:20 pm on April 12, 2004, US Eastern Standard Time
# 2004-04-12T13:20:00+02:00 1:20 pm on April 12, 2004, Central European Summer Time
# 2004-04-12T13:20:15.5-05:00 1:20 pm and 15.5 seconds on April 12, 2004, US Eastern Standard Time
# 2004-04-12T13:20:00Z 1:20 pm on April 12, 2004, Coordinated Universal Time (UTC)

XS_DATE_TIME = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[\.Z]?\d*[\-\+]?\d*:?\d*$/

def self.convert(value)
split = value.split
return value if split.size > 1

case split.first
when "true" then
true
when "false" then
false
when XS_DATE_TIME then
try_to_convert(value) { |x| DateTime.parse(x) }
when XS_DATE then
try_to_convert(value) { |x| Date.parse(x) }
when XS_TIME then
try_to_convert(value) { |x| Time.parse(x) }
else
value
end
end

def self.try_to_convert(value, &block)
block.call(value)
rescue ArgumentError
value
end
end
end
end
Loading