Skip to content
This repository has been archived by the owner on Nov 29, 2019. It is now read-only.

Commit

Permalink
add bibtex fetch and output to file using 'extract-bib'
Browse files Browse the repository at this point in the history
  • Loading branch information
jdherman committed Jul 16, 2014
1 parent 7f168af commit 2e61519
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 1 deletion.
6 changes: 6 additions & 0 deletions bin/pdf-extract
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ resolvers = {

outputs = {
:xml => proc { :stdout },
:bib => proc { |f| File::basename(f.sub /\.[a-zA-Z0-9]+\Z/, "") + ".refs.bib" },
:pdf => proc { |f| File::basename(f.sub /\.[a-zA-Z0-9]+\Z/, "") + ".mask.pdf" }
}

Expand All @@ -31,6 +32,11 @@ commands = [
:view => :xml,
:description => "Extract objects as XML."
},
{
:name => "extract-bib",
:view => :bib,
:description => "Extract resolved references in BibTeX format."
},
{
:name => "mark",
:view => :pdf,
Expand Down
2 changes: 2 additions & 0 deletions lib/pdf/extract.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
require_relative 'extract/references/resolved_references.rb'
require_relative 'extract/view/pdf_view.rb'
require_relative 'extract/view/xml_view.rb'
require_relative 'extract/view/bib_view.rb'

module PdfExtract

Expand Down Expand Up @@ -68,6 +69,7 @@ def self.init

add_view :pdf, PdfView
add_view :xml, XmlView
add_view :bib, BibView
end

init
Expand Down
3 changes: 2 additions & 1 deletion lib/pdf/extract/references/resolve.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ def self.find ref
url = "http://search.labs.crossref.org/dois?q=#{CGI.escape(ref)}&rows=1"
query = JSON.parse(open(url).read())
unless query.nil?
resolved[:doi] = query[0]["doi"]
resolved[:doi] = query[0]["doi"].sub "http://dx.doi.org/",""
resolved[:score] = query[0]["score"]
puts "Found DOI from Text: #{resolved[:doi]} (Score: #{resolved[:score]})"
end
resolved
end
Expand Down
48 changes: 48 additions & 0 deletions lib/pdf/extract/view/bib_view.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
require 'net/http'

require_relative 'abstract_view'
require_relative '../language'

module PdfExtract
class BibView < AbstractView

def render options={}

bibs = []

objects.each_pair do |type, objs|
objs.each do |obj|

if obj.key? :doi and obj.key? :score

if obj[:score] > 1
url = "http://api.crossref.org/works/#{obj[:doi]}/transform/application/x-bibtex"
begin
bib = open(URI.encode(url)).read()
rescue URI::InvalidURIError
puts "DOI not a valid URL: #{obj[:doi]}"
rescue OpenURI::HTTPError
puts "DOI not found on CrossRef: #{obj[:doi]}"
else
puts "Found BibTeX from DOI: #{obj[:doi]}"
bibs << bib
end
end

else
raise "Must run extract-bib with --resolved_references flag"
end
end
end

bibs.join("\n")
end

def self.write render, filename
File.open filename, "w" do |file|
file.write render
end
end

end
end

0 comments on commit 2e61519

Please sign in to comment.