diff --git a/lib/arclight/normalized_id.rb b/lib/arclight/normalized_id.rb index 1e02ae413..afc6ccb35 100644 --- a/lib/arclight/normalized_id.rb +++ b/lib/arclight/normalized_id.rb @@ -7,7 +7,10 @@ module Arclight # A simple utility class to normalize identifiers # to be used around the application for linking class NormalizedId - def initialize(id) + # Accepts unused kwargs from the ead_config.rb id to_field directive + # (:title and :repository) so that applications can provide a custom + # id_normalizer class to traject to form the collection id from these attributes. + def initialize(id, **_kwargs) @id = id end diff --git a/lib/arclight/traject/ead2_config.rb b/lib/arclight/traject/ead2_config.rb index da0c37b8b..8e6d66a2f 100644 --- a/lib/arclight/traject/ead2_config.rb +++ b/lib/arclight/traject/ead2_config.rb @@ -6,6 +6,7 @@ require 'traject_plus' require 'traject_plus/macros' require 'arclight/level_label' +require 'arclight/normalized_id' require 'arclight/normalized_date' require 'arclight/normalized_title' require 'active_model/conversion' ## Needed for Arclight::Repository @@ -53,6 +54,7 @@ settings do provide 'component_traject_config', File.join(__dir__, 'ead2_component_config.rb') + provide 'id_normalizer', 'Arclight::NormalizedId' provide 'date_normalizer', 'Arclight::NormalizedDate' provide 'title_normalizer', 'Arclight::NormalizedTitle' provide 'reader_class_name', 'Arclight::Traject::NokogiriNamespacelessReader' @@ -75,7 +77,13 @@ # NOTE: All fields should be stored in Solr # ================== -to_field 'id', extract_xpath('/ead/eadheader/eadid'), strip, gsub('.', '-') +to_field 'id' do |record, accumulator| + id = record.at_xpath('/ead/eadheader/eadid')&.text + title = record.at_xpath('/ead/archdesc/did/unittitle')&.text + repository = settings['repository'] + accumulator << settings['id_normalizer'].constantize.new(id, title: title, repository: repository).to_s +end + to_field 'title_filing_ssi', extract_xpath('/ead/eadheader/filedesc/titlestmt/titleproper[@type="filing"]') to_field 'title_ssm', extract_xpath('/ead/archdesc/did/unittitle') to_field 'title_tesim', extract_xpath('/ead/archdesc/did/unittitle') diff --git a/spec/lib/arclight/normalized_id_spec.rb b/spec/lib/arclight/normalized_id_spec.rb index 0c51be683..abcb99e3c 100644 --- a/spec/lib/arclight/normalized_id_spec.rb +++ b/spec/lib/arclight/normalized_id_spec.rb @@ -31,4 +31,12 @@ ) end end + + context 'when additional keyword arguments are supplied' do + subject(:normalized_id) { described_class.new('abc123.xml', title: 'a title', repository: 'repo').to_s } + + it 'accepts the additional arguments without changing the output' do + expect(normalized_id).to eq 'abc123-xml' + end + end end