-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d0e0808
commit 91b207a
Showing
5 changed files
with
99 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,44 @@ | ||
# frozen_string_literal: true | ||
require 'faraday/net_http_persistent' | ||
require 'find' | ||
require 'rsolr' | ||
require_relative 'config' | ||
|
||
module Gingr | ||
class SolrIndexer | ||
include Config | ||
|
||
attr_reader :reference_urls | ||
attr_reader :solr | ||
attr_accessor :reference_urls | ||
attr_accessor :solr | ||
|
||
def initialize(url, reference_urls = {}) | ||
@solr = RSolr.connect url:, adapter: :net_http_persistent | ||
@reference_urls = reference_urls | ||
def initialize(solr = nil, reference_urls = nil) | ||
solr ||= ENV['SOLR_URL'] || Gingr::Config.default_options[:solr_url] | ||
solr = RSolr.connect url: solr, adapter: :net_http_persistent if solr.kind_of? String | ||
@solr = solr | ||
@reference_urls = reference_urls || {} | ||
end | ||
|
||
def update_reference_urls? | ||
!@reference_urls.empty? | ||
def add(doc) | ||
doc = JSON.load_file(doc) if doc.kind_of? String | ||
update_reference_urls!(doc) | ||
@solr.add doc | ||
end | ||
|
||
def update(file_path) | ||
commit_within = ENV.fetch('SOLR_COMMIT_WITHIN', 5000).to_i | ||
doc = JSON.parse(File.read(file_path)) | ||
[doc].flatten.each do |record| | ||
update_reference_urls!(record) if update_reference_urls? | ||
@solr.update params: { commitWithin: commit_within, overwrite: true }, | ||
data: [record].to_json, | ||
headers: { 'Content-Type' => 'application/json' } | ||
end | ||
end | ||
|
||
def commit | ||
@solr.commit | ||
def index_directory(directory) | ||
Find.find(directory) | ||
.select(&method(:json_file?)) | ||
.each(&method(:add)) | ||
end | ||
|
||
private | ||
|
||
def update_reference_urls!(record) | ||
references = record['dct_references_s'] | ||
|
||
Config.reference_urls.each do |name, from_url| | ||
def update_reference_urls!(doc) | ||
Gingr::Config.reference_urls.each do |name, from_url| | ||
to_url = @reference_urls[name] | ||
references = references.gsub(from_url, to_url) if to_url | ||
doc['dct_references_s'].gsub!(from_url, to_url) if to_url | ||
end | ||
record['dct_references_s'] = references | ||
end | ||
|
||
def json_file?(filepath) | ||
File.extname(filepath).casecmp?('.json') | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,90 @@ | ||
# frozen_string_literal: true | ||
require 'spec_helper' | ||
require 'find' | ||
require 'gingr/solr_indexer' | ||
|
||
RSpec.describe Gingr::SolrIndexer do | ||
let(:url) { 'http://solr:8983/solr/geodata-test' } | ||
let(:solr) { instance_double(RSolr::Client) } | ||
around(:each) do |test| | ||
original_solr_url = ENV['SOLR_URL'] | ||
test.run | ||
ensure | ||
ENV['SOLR_URL'] = original_solr_url | ||
end | ||
|
||
describe '#initialize' do | ||
before do | ||
allow(RSolr).to receive(:connect).and_return(solr) | ||
it 'initializes a solr client with the given url' do | ||
indexer = Gingr::SolrIndexer.new('http://solr-from-init/') | ||
expect(indexer.solr.uri.to_s).to eq 'http://solr-from-init/' | ||
end | ||
|
||
it 'falls back to ENV["SOLR_URL"] if it is set' do | ||
ENV['SOLR_URL'] = 'http://solr-from-env/' | ||
indexer = Gingr::SolrIndexer.new | ||
expect(indexer.solr.uri.to_s).to eq 'http://solr-from-env/' | ||
end | ||
|
||
it 'should initialize' do | ||
described_class.new(url) | ||
expect(RSolr).to have_received(:connect).with( | ||
url:, | ||
adapter: :net_http_persistent | ||
) | ||
it 'falls back to the config' do | ||
ENV.delete 'SOLR_URL' | ||
indexer = Gingr::SolrIndexer.new | ||
expect(indexer.solr.uri.to_s).to eq 'http://solr:8983/solr/geodata-test/' | ||
end | ||
end | ||
|
||
describe '#update' do | ||
let(:file_path) { 'spec/fixture/jsonfile/berkeley_public_pdf.json' } | ||
let(:doc) { JSON.parse(File.read(file_path)) } | ||
describe '#update_reference_urls!' do | ||
let(:document) { JSON.load_file('spec/fixture/jsonfile/berkeley_public_pdf.json') } | ||
|
||
before do | ||
allow(solr).to receive(:update) | ||
allow(RSolr).to receive(:connect).and_return(solr) | ||
solr_indexer.update(file_path) | ||
it 'does nothing if reference_urls are nil' do | ||
indexer = Gingr::SolrIndexer.new | ||
expect { indexer.update_reference_urls! document }.not_to change { document } | ||
end | ||
|
||
context 'update reference urls' do | ||
let(:solr_indexer) { described_class.new(url, reference_urls) } | ||
let(:reference_urls) do | ||
{ 'geoserver_secure' => 'http://fake_geoserver_secure:8081', | ||
'geoserver' => 'http://fake_geoserver:8080', | ||
'download' => 'https://fake_spatial.lib.berkeley.edu' } | ||
end | ||
it 'updates references if configured to do so' do | ||
refs = { geoserver_url: 'http://geoserver-at-init/' } | ||
indexer = Gingr::SolrIndexer.new(nil, refs) | ||
expect { indexer.update_reference_urls! document }.to change { document } | ||
expect(document['dct_references_s']).to match 'http://geoserver-at-init/' | ||
end | ||
end | ||
|
||
it 'should call solr' do | ||
expect(solr).to have_received(:update).with( | ||
params: { commitWithin: 5000, overwrite: true }, | ||
data: [[doc].flatten[0]].to_json, | ||
headers: { 'Content-Type' => 'application/json' } | ||
) | ||
end | ||
describe '#index_directory' do | ||
it 'adds all .json files to solr' do | ||
files = ['foo.xml', 'bar.json', 'baz.json'].shuffle | ||
expect(Find).to receive(:find).with('directory').and_return files | ||
Gingr::SolrIndexer.any_instance.stub(:add) | ||
|
||
it 'should call the update reference field method' do | ||
expect(solr_indexer.update_reference_urls?).to eq(true) | ||
end | ||
indexer = Gingr::SolrIndexer.new | ||
indexer.index_directory('directory') | ||
expect(indexer).to have_received(:add).with('bar.json') | ||
expect(indexer).to have_received(:add).with('baz.json') | ||
expect(indexer).not_to have_received(:add).with('foo.xml') | ||
end | ||
end | ||
|
||
context 'not update reference urls' do | ||
let(:solr_indexer) { described_class.new(url) } | ||
it 'should not call the update reference field method' do | ||
solr_indexer.update(file_path) | ||
expect(solr_indexer.update_reference_urls?).to eq(false) | ||
end | ||
describe '#add' do | ||
let(:document) { JSON.load_file document_path } | ||
let(:document_path) { 'spec/fixture/jsonfile/berkeley_public_pdf.json' } | ||
|
||
it 'passes documents to the rsolr client' do | ||
solr = spy(RSolr::Client) | ||
indexer = Gingr::SolrIndexer.new(solr) | ||
indexer.add(document) | ||
expect(solr).to have_received(:add).with(document) | ||
end | ||
end | ||
|
||
describe '#commit' do | ||
before do | ||
allow(RSolr).to receive(:connect).and_return(solr) | ||
allow(solr).to receive(:commit) | ||
it 'automatically loads filepaths as JSON' do | ||
solr = spy(RSolr::Client) | ||
indexer = Gingr::SolrIndexer.new(solr) | ||
indexer.add(document_path) | ||
expect(solr).to have_received(:add).with(document) | ||
end | ||
|
||
it 'should initialize' do | ||
solr_indexer = described_class.new(url) | ||
solr_indexer.commit | ||
expect(solr_indexer.solr).to have_received(:commit) | ||
it 'modifies reference urls' do | ||
solr = spy(RSolr::Client) | ||
refs = { geoserver_url: 'http://geoserver-from-init/' } | ||
indexer = Gingr::SolrIndexer.new(solr, refs) | ||
expect { indexer.add(document) }.to change { document } | ||
expect(document['dct_references_s']).to match('http://geoserver-from-init/') | ||
expect(solr).to have_received(:add).with(document) | ||
end | ||
end | ||
end |