diff --git a/lib/arclight/normalized_date.rb b/lib/arclight/normalized_date.rb index e1a2cf111..1daf153ad 100644 --- a/lib/arclight/normalized_date.rb +++ b/lib/arclight/normalized_date.rb @@ -6,27 +6,24 @@ module Arclight # e.g., "1990-2000, bulk 1990-1999" # @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date class NormalizedDate - # @param [String | Array] `inclusive` from the `unitdate` - # @param [Array] `bulk` from the `unitdate` - # @param [Array] `other` from the `unitdate` when type is not specified - def initialize(inclusive, bulk = [], other = []) - @inclusive = (inclusive || []).map do |inclusive_text| - if inclusive_text.is_a? Array # of YYYY-YYYY for ranges - # NOTE: This code is not routable AFAICT in actual indexing. - # We pass arrays of strings (or xml nodes) here, and never a multidimensional array - year_range(inclusive_text) - elsif inclusive_text.present? - inclusive_text.strip + # @param [Array] an array of unitdate strings in order + # @param [Array] an array of corresponding type labels for dates or nil + def initialize(unitdates, unitdate_labels) + @date_accumulator = [] + if unitdates.present? + unitdates.each_with_index do |unitdate, i| + if unitdate_labels[i].downcase.match?('bulk') + @date_accumulator << "#{unitdate_labels[i]} #{unitdate}" + else + @date_accumulator << unitdate + end end - end&.join(', ') - - @bulk = Array.wrap(bulk).compact.map(&:strip).join(', ') - @other = Array.wrap(other).compact.map(&:strip).join(', ') + end end # @return [String] the normalized title/date def to_s - normalize + @date_accumulator.join(', ') end private @@ -36,14 +33,5 @@ def to_s def year_range(date_array) YearRange.new(date_array.include?('/') ? date_array : date_array.map { |v| v.tr('-', '/') }).to_s end - - # @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date for rules - def normalize - result = [] - result << inclusive if inclusive.present? - result << other if other.present? - result << "bulk #{bulk}" if bulk.present? - result.compact.map(&:strip).join(', ') - end end end diff --git a/lib/arclight/traject/ead2_component_config.rb b/lib/arclight/traject/ead2_component_config.rb index 8c721a30b..ee1949538 100644 --- a/lib/arclight/traject/ead2_component_config.rb +++ b/lib/arclight/traject/ead2_component_config.rb @@ -107,15 +107,21 @@ to_field 'title_ssm', extract_xpath('./did/unittitle') to_field 'title_tesim', extract_xpath('./did/unittitle') -to_field 'unitdate_bulk_ssim', extract_xpath('./did/unitdate[@type="bulk"]') -to_field 'unitdate_inclusive_ssm', extract_xpath('./did/unitdate[@type="inclusive"]') -to_field 'unitdate_other_ssim', extract_xpath('./did/unitdate[not(@type)]') +to_field 'unitdates_ssm', extract_xpath('./did/unitdate') +to_field 'unitdates_labels_ssm' do |record, accumulator| + record.xpath('.//did/unitdate').each do |unitdate| + if unitdate.attribute('type') + accumulator << unitdate.attribute('type')&.value + else + accumulator << "" + end + end +end to_field 'normalized_date_ssm' do |_record, accumulator, context| accumulator << settings['date_normalizer'].constantize.new( - context.output_hash['unitdate_inclusive_ssm'], - context.output_hash['unitdate_bulk_ssim'], - context.output_hash['unitdate_other_ssim'] + context.output_hash['unitdates_ssm'], + context.output_hash['unitdates_labels_ssm'] ).to_s end diff --git a/lib/arclight/traject/ead2_config.rb b/lib/arclight/traject/ead2_config.rb index 328c390fe..3badf1e8d 100644 --- a/lib/arclight/traject/ead2_config.rb +++ b/lib/arclight/traject/ead2_config.rb @@ -80,10 +80,16 @@ to_field 'title_tesim', extract_xpath('/ead/archdesc/did/unittitle') to_field 'ead_ssi', extract_xpath('/ead/eadheader/eadid') -to_field 'unitdate_ssm', extract_xpath('/ead/archdesc/did/unitdate') -to_field 'unitdate_bulk_ssim', extract_xpath('/ead/archdesc/did/unitdate[@type="bulk"]') -to_field 'unitdate_inclusive_ssm', extract_xpath('/ead/archdesc/did/unitdate[@type="inclusive"]') -to_field 'unitdate_other_ssim', extract_xpath('/ead/archdesc/did/unitdate[not(@type)]') +to_field 'unitdates_ssm', extract_xpath('/ead/archdesc/did/unitdate') +to_field 'unitdates_labels_ssm' do |record, accumulator| + record.xpath('/ead/archdesc/did/unitdate').each do |unitdate| + if unitdate.attribute('type') + accumulator << unitdate.attribute('type')&.value + else + accumulator << "" + end + end +end # All top-level docs treated as 'collection' for routing / display purposes to_field 'level_ssm' do |_record, accumulator| @@ -104,9 +110,8 @@ to_field 'normalized_date_ssm' do |_record, accumulator, context| accumulator << settings['date_normalizer'].constantize.new( - context.output_hash['unitdate_inclusive_ssm'], - context.output_hash['unitdate_bulk_ssim'], - context.output_hash['unitdate_other_ssim'] + context.output_hash['unitdates_ssm'], + context.output_hash['unitdates_labels_ssm'] ).to_s end diff --git a/spec/fixtures/ead/nlm/alphaomegaalpha.xml b/spec/fixtures/ead/nlm/alphaomegaalpha.xml index 93fa5343f..f8116303d 100644 --- a/spec/fixtures/ead/nlm/alphaomegaalpha.xml +++ b/spec/fixtures/ead/nlm/alphaomegaalpha.xml @@ -56,7 +56,9 @@ Compact digital disc 3 CDs + 1888 1894-1992 + 1903-1962 Collection materials primarily in English. @@ -407,6 +409,7 @@ MS C 271.I 1902-1976 1975-1976 + 1988

Administrative records include details materials directly related to the history and diff --git a/spec/lib/arclight/normalized_date_spec.rb b/spec/lib/arclight/normalized_date_spec.rb index aea180d2d..80b1ed4a5 100644 --- a/spec/lib/arclight/normalized_date_spec.rb +++ b/spec/lib/arclight/normalized_date_spec.rb @@ -3,22 +3,22 @@ require 'spec_helper' RSpec.describe Arclight::NormalizedDate do - subject(:normalized_date) { described_class.new(date_inclusive, date_bulk, date_other).to_s } + subject(:normalized_date) { described_class.new(unitdates, unitdate_labels).to_s } - let(:date_inclusive) { ['1990-2000'] } - let(:date_bulk) { '1999-2005' } - let(:date_other) { 'Undated' } + let(:unitdates) { ['1905', '1927-2000', '1982-1995'] } + let(:unitdate_labels) { ['', 'inclusive', 'bulk'] } context 'under normal conditions' do it 'joins dates' do - expect(normalized_date).to eq '1990-2000, Undated, bulk 1999-2005' + expect(normalized_date).to eq '1905, 1927-2000, bulk 1982-1995' end context 'multiple normalized dates' do - let(:date_inclusive) { %w[1990 1992] } + let(:unitdates) { %w[1990 1992] } + let(:unitdate_labels) { %w[inclusive inclusive] } it 'are joined w/ a comma' do - expect(normalized_date).to eq '1990, 1992, Undated, bulk 1999-2005' + expect(normalized_date).to eq '1990, 1992' end end end @@ -27,61 +27,59 @@ # NOTE: This test is the only place where the code that exercises this is routable # This has to be a multidimensional array, and the resulting XML nodes sent in are always flat context 'multiples' do - let(:date_inclusive) { [%w[1990-2000 2001-2002 2004]] } - let(:date_bulk) { '1990-2004' } + let(:unitdates) { ['1990-2000', '2001-2002', '2004', '1990-2004'] } + let(:unitdate_labels) { ['inclusive', 'inclusive', 'INCLUSIVE', 'bulk'] } it 'uses compressed joined years' do - expect(normalized_date).to eq '1990-2002, 2004, Undated, bulk 1990-2004' + expect(normalized_date).to eq '1990-2000, 2001-2002, 2004, bulk 1990-2004' end end context 'undated' do - let(:date_bulk) { 'n.d.' } + let(:unitdates) { ['1905', '1927-2000', 'n.d.'] } it 'do not normalized term "undated"' do - expect(normalized_date).to eq '1990-2000, Undated, bulk n.d.' + expect(normalized_date).to eq '1905, 1927-2000, bulk n.d.' end end - context 'circa' do - let(:date_bulk) { 'c.1995' } + context 'circa and mixed case' do + let(:unitdates) { ['1990-2000', 'c.1995'] } + let(:unitdate_labels) { ['', 'BuLk'] } it 'do not normalized term "circa"' do - expect(normalized_date).to eq '1990-2000, Undated, bulk c.1995' + expect(normalized_date).to eq '1990-2000, BuLk c.1995' end end context 'no bulk' do - let(:date_bulk) { nil } - let(:date_other) { nil } + let(:unitdate_labels) { ['', 'inclusive', ''] } it 'uses inclusive date only' do - expect(normalized_date).to eq '1990-2000' + expect(normalized_date).to eq '1905, 1927-2000, 1982-1995' end end context 'no inclusive or bulk but other' do - let(:date_inclusive) { nil } - let(:date_bulk) { nil } - let(:date_other) { 'n.d.' } + let(:unitdates) { %w[1963 1954] } + let(:unitdate_labels) { ['', ''] } it 'uses other' do - expect(normalized_date).to eq 'n.d.' + expect(normalized_date).to eq '1963, 1954' end end context 'no inclusive but bulk' do - let(:date_inclusive) { nil } - - it 'uses other and bulk' do - expect(normalized_date).to eq 'Undated, bulk 1999-2005' + let(:unitdates) { %w[1963 1954-1990] } + let(:unitdate_labels) { ['bulk', ''] } + it 'does not know what to do' do + expect(normalized_date).to eq 'bulk 1963, 1954-1990' end end context 'no information' do - let(:date_inclusive) { nil } - let(:date_bulk) { nil } - let(:date_other) { nil } + let(:unitdates) { nil } + let(:unitdate_labels) { nil } it 'does not know what to do' do expect(normalized_date).to eq ''