From 8a9a75909f0938d04aab6784c7520c8e8991a9cf Mon Sep 17 00:00:00 2001 From: enigmatic00 Date: Wed, 24 Oct 2012 14:03:42 -0500 Subject: [PATCH 1/4] adding gemspec --- calais-full-relations.gemspec | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 calais-full-relations.gemspec diff --git a/calais-full-relations.gemspec b/calais-full-relations.gemspec new file mode 100644 index 0000000..ad9983f --- /dev/null +++ b/calais-full-relations.gemspec @@ -0,0 +1,29 @@ +# -*- encoding: utf-8 -*- + +require File.expand_path("../lib/calais/version", __FILE__) + +Gem::Specification.new do |gem| + gem.name = 'calais-full-relations' + gem.version = Calais::VERSION + gem.date = Date.today.to_s + + gem.summary = 'A Ruby interface to the Calais Web Service' + gem.description = 'A Ruby interface to the Calais Web Service' + + gem.authors = ['enigmatic00'] + gem.homepage = 'https://github.com/enigmatic00/calais' + + gem.add_dependency("nokogiri", ">= 1.3.3") + gem.add_dependency("json", ">= 1.1.3") + + gem.add_development_dependency("rspec", ">= 2.9.0") + + gem.files = Dir[ + "CHANGELOG.markdown", + "Gemfile", + "MIT-LICENSE", + "README.markdown", + "Rakefile", + "{bin,lib,man,test,spec}/**/*" + ] & `git ls-files`.split("\n") +end From b565e5f0f3f22b974ca50e2618e700235349296a Mon Sep 17 00:00:00 2001 From: enigmatic00 Date: Wed, 24 Oct 2012 14:06:36 -0500 Subject: [PATCH 2/4] collect full opencalais hash urls, update merge function to convert value into array if there are multiple values with same key on relations. --- lib/calais/response.rb | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/calais/response.rb b/lib/calais/response.rb index 49db0fe..e255d82 100644 --- a/lib/calais/response.rb +++ b/lib/calais/response.rb @@ -131,7 +131,7 @@ def extract_data end @relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node| - subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1] + subject_hash = node.xpath("c:subject[1]").first[:resource]#.split('/')[-1] acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f node.remove @@ -139,7 +139,7 @@ def extract_data end @entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node| - extracted_hash = node['about'].split('/')[-1] rescue nil + extracted_hash = node['about']#.split('/')[-1] rescue nil entity = Entity.new entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes) @@ -152,20 +152,17 @@ def extract_data node.remove entity end - @relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node| - extracted_hash = node['about'].split('/')[-1] rescue nil + extracted_hash = node['about']#.split('/')[-1] rescue nil relation = Relation.new relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes) relation.type = extract_type(node) relation.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]")) relation.instances = extract_instances(doc, extracted_hash) - node.remove relation end - @geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node| attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]")) @@ -187,7 +184,7 @@ def extract_data def extract_instances(doc, hash) doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node| - instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash + instance_node.xpath("c:subject[1]").first[:resource]#.split("/")[-1] == hash end.map do |instance_node| instance = Instance.from_node(instance_node) instance_node.remove @@ -197,7 +194,7 @@ def extract_instances(doc, hash) end def extract_type(node) - node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1] + node.xpath("*[name()='rdf:type']")[0]['resource']#.split('/')[-1] rescue nil end @@ -205,12 +202,12 @@ def extract_type(node) def extract_attributes(nodes) nodes.inject({}) do |hsh, node| value = if node['resource'] - extracted_hash = node['resource'].split('/')[-1] rescue nil + extracted_hash = node['resource'] rescue nil CalaisHash.find_or_create(extracted_hash, @hashes) else node.content end - hsh.merge(node.name => value) + hsh.merge(node.name => value){|k,o,n| [o,n].flatten} end end def extract_relevance(value) From de0c811e2f6383ecbea8dd9ddc29351e765b1e4e Mon Sep 17 00:00:00 2001 From: Daniele Bonadiman Date: Thu, 10 Jan 2013 17:21:47 +0100 Subject: [PATCH 3/4] * fix problem with Nokogiri 1.5.6 --- CHANGELOG.markdown | 2 ++ calais.gemspec | 30 ------------------------------ lib/calais/response.rb | 22 ++++++++++------------ 3 files changed, 12 insertions(+), 42 deletions(-) delete mode 100644 calais.gemspec diff --git a/CHANGELOG.markdown b/CHANGELOG.markdown index e039fb5..8a6b4e3 100644 --- a/CHANGELOG.markdown +++ b/CHANGELOG.markdown @@ -1,4 +1,6 @@ # Changes +## 0.0.14 +* fix problem with Nokogiri 1.5.6 ## 0.0.13 diff --git a/calais.gemspec b/calais.gemspec deleted file mode 100644 index 42f7d51..0000000 --- a/calais.gemspec +++ /dev/null @@ -1,30 +0,0 @@ -# -*- encoding: utf-8 -*- - -require File.expand_path("../lib/calais/version", __FILE__) - -Gem::Specification.new do |gem| - gem.name = 'calais' - gem.version = Calais::VERSION - gem.date = Date.today.to_s - - gem.summary = 'A Ruby interface to the Calais Web Service' - gem.description = 'A Ruby interface to the Calais Web Service' - - gem.authors = ['Abhay Kumar'] - gem.email = 'info@opensynapse.net' - gem.homepage = 'http://github.com/abhay/calais' - - gem.add_dependency("nokogiri", ">= 1.3.3") - gem.add_dependency("json", ">= 1.1.3") - - gem.add_development_dependency("rspec", ">= 2.9.0") - - gem.files = Dir[ - "CHANGELOG.markdown", - "Gemfile", - "MIT-LICENSE", - "README.markdown", - "Rakefile", - "{bin,lib,man,test,spec}/**/*" - ] & `git ls-files`.split("\n") -end diff --git a/lib/calais/response.rb b/lib/calais/response.rb index 49db0fe..c2a5572 100644 --- a/lib/calais/response.rb +++ b/lib/calais/response.rb @@ -60,7 +60,6 @@ def self.from_node(node) instance.suffix = node.xpath("c:suffix[1]").first.content instance.offset = node.xpath("c:offset[1]").first.content.to_i instance.length = node.xpath("c:length[1]").first.content.to_i - instance end end @@ -82,14 +81,13 @@ def self.find_or_create(hash, hashes) private def extract_data doc = Nokogiri::XML(@raw_response) - if doc.root.xpath("/Error[1]").first raise Calais::Error, doc.root.xpath("/Error/Exception").first.content end doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node| - @language = node['language'] - @submission_date = DateTime.parse node['submissionDate'] + @language = node['c:language'] + @submission_date = DateTime.parse node['c:submissionDate'] attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]")) @@ -100,7 +98,7 @@ def extract_data end doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node| - @request_id = node['calaisRequestID'] + @request_id = node['c:calaisRequestID'] attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]")) @@ -131,7 +129,7 @@ def extract_data end @relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node| - subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1] + subject_hash = node.xpath("c:subject[1]/@rdf:resource").first.content.split('/')[-1] acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f node.remove @@ -139,7 +137,7 @@ def extract_data end @entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node| - extracted_hash = node['about'].split('/')[-1] rescue nil + extracted_hash = node['rdf:about'].split('/')[-1] rescue nil entity = Entity.new entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes) @@ -154,7 +152,7 @@ def extract_data end @relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node| - extracted_hash = node['about'].split('/')[-1] rescue nil + extracted_hash = node['rdf:about'].split('/')[-1] rescue nil relation = Relation.new relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes) @@ -187,7 +185,7 @@ def extract_data def extract_instances(doc, hash) doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node| - instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash + instance_node.xpath("c:subject[1]/@rdf:resource").first.content.split("/")[-1] == hash end.map do |instance_node| instance = Instance.from_node(instance_node) instance_node.remove @@ -197,15 +195,15 @@ def extract_instances(doc, hash) end def extract_type(node) - node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1] + node.xpath("*[name()='rdf:type']")[0]['rdf:resource'].split('/')[-1] rescue nil end def extract_attributes(nodes) nodes.inject({}) do |hsh, node| - value = if node['resource'] - extracted_hash = node['resource'].split('/')[-1] rescue nil + value = if node['rdf:resource'] + extracted_hash = node['rdf:resource'].split('/')[-1] rescue nil CalaisHash.find_or_create(extracted_hash, @hashes) else node.content From 25a2692452f1ee524e2ff7bf8ec779a217d961e4 Mon Sep 17 00:00:00 2001 From: Daniele Bonadiman Date: Fri, 11 Jan 2013 13:37:40 +0100 Subject: [PATCH 4/4] remove icon warning --- lib/calais.rb | 1 - lib/calais/client.rb | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/calais.rb b/lib/calais.rb index d9455b0..6b525e3 100644 --- a/lib/calais.rb +++ b/lib/calais.rb @@ -2,7 +2,6 @@ require 'net/http' require 'uri' require 'cgi' -require 'iconv' require 'set' require 'date' diff --git a/lib/calais/client.rb b/lib/calais/client.rb index 5d572a3..dda9ad6 100644 --- a/lib/calais/client.rb +++ b/lib/calais/client.rb @@ -23,7 +23,7 @@ def initialize(options={}, &block) def enlighten post_args = { "licenseID" => @license_id, - "content" => Iconv.iconv('UTF-8//IGNORE', 'UTF-8', "#{@content} ").first[0..-2], + "content" => "#{@content} ".encode(Encoding::UTF_8, :invalid => :replace, :undef => :replace, :replace => '')[0..-2], "paramsXML" => params_xml }