Skip to content

Commit

Permalink
Replicate the configurable date & title normalizer classes from compo…
Browse files Browse the repository at this point in the history
…nent traject config over to collection. Closes #1434.

- Implementers using these options will need them on both component and collection level
- Reorders related settings so it is clearer what collection & component configs have in common
  • Loading branch information
seanaery committed Dec 12, 2023
1 parent 91b5efd commit 7470b30
Showing 1 changed file with 9 additions and 11 deletions.
20 changes: 9 additions & 11 deletions lib/arclight/traject/ead2_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@
].freeze

settings do
provide 'component_traject_config', File.join(__dir__, 'ead2_component_config.rb')
provide 'date_normalizer', 'Arclight::NormalizedDate'
provide 'title_normalizer', 'Arclight::NormalizedTitle'
provide 'reader_class_name', 'Arclight::Traject::NokogiriNamespacelessReader'
provide 'solr_writer.commit_on_close', 'true'
provide 'repository', ENV.fetch('REPOSITORY_ID', nil)
provide 'logger', Logger.new($stderr)
provide 'component_traject_config', File.join(__dir__, 'ead2_component_config.rb')
end

each_record do |_record, context|
Expand Down Expand Up @@ -100,22 +102,18 @@
to_field 'unitid_ssm', extract_xpath('/ead/archdesc/did/unitid')
to_field 'unitid_tesim', extract_xpath('/ead/archdesc/did/unitid')

to_field 'normalized_title_ssm' do |_record, accumulator, context|
dates = Arclight::NormalizedDate.new(
to_field 'normalized_date_ssm' do |_record, accumulator, context|
accumulator << settings['date_normalizer'].constantize.new(
context.output_hash['unitdate_inclusive_ssm'],
context.output_hash['unitdate_bulk_ssim'],
context.output_hash['unitdate_other_ssim']
).to_s
title = context.output_hash['title_ssm'].first
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
end

to_field 'normalized_date_ssm' do |_record, accumulator, context|
accumulator << Arclight::NormalizedDate.new(
context.output_hash['unitdate_inclusive_ssm'],
context.output_hash['unitdate_bulk_ssim'],
context.output_hash['unitdate_other_ssim']
).to_s
to_field 'normalized_title_ssm' do |_record, accumulator, context|
title = context.output_hash['title_ssm']&.first
date = context.output_hash['normalized_date_ssm']&.first
accumulator << settings['title_normalizer'].constantize.new(title, date).to_s
end

to_field 'collection_title_tesim' do |_record, accumulator, context|
Expand Down

0 comments on commit 7470b30

Please sign in to comment.