Skip to content

Commit

Permalink
Adding missing elements from physdesc
Browse files Browse the repository at this point in the history
  • Loading branch information
randalldfloyd committed Dec 13, 2023
1 parent 878dd3e commit f3c30db
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 16 deletions.
25 changes: 24 additions & 1 deletion lib/arclight/traject/ead2_component_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,29 @@
accumulator.concat settings[:root].output_hash['normalized_title_ssm']
end

# This accumulates direct text from a physdesc, ignoring child elements handled elsewhere
to_field 'physdesc_ssm', extract_xpath('./did/physdesc', to_text: false) do |_record, accumulator|
accumulator.map! do |element|
physdesc = []
element.children.map do |child|
next if child.instance_of?(Nokogiri::XML::Element)

physdesc << child.text&.strip unless child.text&.strip&.empty?
end.flatten
physdesc.join(' ') unless physdesc.empty?
end
end

to_field 'physdesc_tesim' do |_record, accumulator, context|
accumulator.concat context.output_hash['physdesc_ssm'] || []
end

to_field 'extent_ssm' do |record, accumulator|
physdescs = record.xpath('./did/physdesc')
extents_per_physdesc = physdescs.map do |physdesc|
extents = physdesc.xpath('./extent').map { |e| e.text.strip }
# Join extents within the same physdesc with an empty string
extents.join(' ')
extents.join(' ') unless extents.empty?
end

# Add each physdesc separately to the accumulator
Expand All @@ -171,6 +188,12 @@
accumulator.concat context.output_hash['extent_ssm'] || []
end

to_field 'physfacet_ssm', extract_xpath('./did/physdesc/physfacet')
to_field 'physfacet_tesim', extract_xpath('./did/physdesc/physfacet')

to_field 'dimensions_ssm', extract_xpath('./did/physdesc/dimensions')
to_field 'dimensions_tesim', extract_xpath('./did/physdesc/dimensions')

to_field 'creator_ssm', extract_xpath('./did/origination')
to_field 'creator_ssim', extract_xpath('./did/origination')
to_field 'creators_ssim', extract_xpath('./did/origination')
Expand Down
25 changes: 24 additions & 1 deletion lib/arclight/traject/ead2_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,29 @@
end
end

# This accumulates direct text from a physdesc, ignoring child elements handled elsewhere
to_field 'physdesc_ssm', extract_xpath('/ead/archdesc/did/physdesc', to_text: false) do |_record, accumulator|
accumulator.map! do |element|
physdesc = []
element.children.map do |child|
next if child.instance_of?(Nokogiri::XML::Element)

physdesc << child.text&.strip unless child.text&.strip&.empty?
end.flatten
physdesc.join(' ') unless physdesc.empty?
end
end

to_field 'physdesc_tesim' do |_record, accumulator, context|
accumulator.concat context.output_hash['physdesc_ssm'] || []
end

to_field 'extent_ssm' do |record, accumulator|
physdescs = record.xpath('/ead/archdesc/did/physdesc')
extents_per_physdesc = physdescs.map do |physdesc|
extents = physdesc.xpath('./extent').map { |e| e.text.strip }
# Join extents within the same physdesc with an empty string
extents.join(' ')
extents.join(' ') unless extents.empty?
end

# Add each physdesc separately to the accumulator
Expand All @@ -199,6 +216,12 @@
accumulator.concat context.output_hash['extent_ssm'] || []
end

to_field 'physfacet_ssm', extract_xpath('/ead/archdesc/did/physdesc/physfacet')
to_field 'physfacet_tesim', extract_xpath('/ead/archdesc/did/physdesc/physfacet')

to_field 'dimensions_ssm', extract_xpath('/ead/archdesc/did/physdesc/dimensions')
to_field 'dimensions_tesim', extract_xpath('/ead/archdesc/did/physdesc/dimensions')

to_field 'genreform_ssim', extract_xpath('/ead/archdesc/controlaccess/genreform')

to_field 'date_range_isim', extract_xpath('/ead/archdesc/did/unitdate/@normal', to_text: false) do |_record, accumulator|
Expand Down
67 changes: 56 additions & 11 deletions spec/features/traject/ead2_indexing_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,22 @@ def components(result)
expect(result['places_ssim']).to equal_array_ignoring_whitespace ['Yosemite National Park (Calif.)']
end

it 'physdesc' do
expect(result['extent_ssm']).to equal_array_ignoring_whitespace ['1.25 Linear Feet (1 volume)', '1 document case', '16 DVDRs']
describe 'physdesc' do
it 'direct text' do
expect(result['physdesc_ssm']).to equal_array_ignoring_whitespace ['Photographic album', 'Single bound volume']
end

it 'extent' do
expect(result['extent_ssm']).to equal_array_ignoring_whitespace ['1.25 Linear Feet (1 volume)', '1 document case', '16 DVDRs']
end

it 'physfacet' do
expect(result['physfacet_ssm']).to equal_array_ignoring_whitespace ['Printed material', 'Digital Video Disc']
end

it 'dimensions' do
expect(result['dimensions_ssm']).to equal_array_ignoring_whitespace ['20 x 20 in.', '7.5 x 5.5 in.']
end
end

it 'has_online_content' do
Expand Down Expand Up @@ -295,18 +309,49 @@ def components(result)
).to_a.first
end

it 'extent at the collection level' do
%w[extent_ssm extent_tesim].each do |field|
expect(result[field]).to equal_array_ignoring_whitespace(['15.0 linear feet (36 boxes + oversize folder)', '3 CDs'])
describe 'physdesc at the collection level' do
it 'direct text' do
expect(result['physdesc_ssm']).to equal_array_ignoring_whitespace ['Boxes and folders', 'Compact discs']
end

it 'extent' do
%w[extent_ssm extent_tesim].each do |field|
expect(result[field]).to equal_array_ignoring_whitespace(['15.0 linear feet (36 boxes + oversize folder)', '3 CDs'])
end
end

it 'physfacet' do
expect(result['physfacet_ssm']).to equal_array_ignoring_whitespace ['Compact digital disc']
end

it 'dimensions' do
expect(result['dimensions_ssm']).to equal_array_ignoring_whitespace ['7.5 x 5.5 in.']
end
end

it 'extent at the component level' do
component = all_components.find { |c| c['ref_ssi'] == ['aspace_a951375d104030369a993ff943f61a77'] }
%w[extent_ssm extent_tesim].each do |field|
expect(component[field]).to equal_array_ignoring_whitespace(
['1.5 Linear Feet']
)
describe 'physdesc at the component level' do
let(:component) do
all_components.find { |c| c['ref_ssi'] == ['aspace_a951375d104030369a993ff943f61a77'] }
end

it 'direct text' do
expect(component['physdesc_ssm']).to equal_array_ignoring_whitespace ['Cards and sheets of various sizes']
end

it 'extent' do
%w[extent_ssm extent_tesim].each do |field|
expect(component[field]).to equal_array_ignoring_whitespace(
['1.5 Linear Feet']
)
end
end

it 'physfacet' do
expect(component['physfacet_ssm']).to equal_array_ignoring_whitespace ['Informational cards']
end

it 'dimensions' do
expect(component['dimensions_ssm']).to equal_array_ignoring_whitespace ['various']
end
end

Expand Down
22 changes: 19 additions & 3 deletions spec/fixtures/ead/nlm/alphaomegaalpha.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,19 @@
<corpname source="ingest">Alpha Omega Alpha</corpname>
</origination>
<unitid>MS C 271</unitid>
<physdesc>
Boxes and folders
</physdesc>
<physdesc>
Compact discs
</physdesc>
<physdesc altrender="part">
<extent altrender="materialtype spaceoccupied">15.0 linear feet (36 boxes + oversize
folder)</extent>
</physdesc>
<physdesc altrender="part">
<dimensions>7.5 x 5.5 in.</dimensions>
<physfacet>Compact digital disc</physfacet>
<extent altrender="materialtype spaceoccupied">3 CDs</extent>
</physdesc>
<unitdate normal="1894/1992" type="inclusive">1894-1992</unitdate>
Expand Down Expand Up @@ -393,7 +401,7 @@
<bioghist id="aspace_ff0f536406ce214f5u38e1f7c798d76e">
<head>Historical Note</head>
<p>
The Society started keeping consistent adminstrative records in 1975, the same year that E. L. Doctorow published <emph render="italic">Ragtime</emph>.
The Society started keeping consistent adminstrative records in 1975, the same year that E. L. Doctorow published <emph render="italic">Ragtime</emph>.
</p>
</bioghist>
<accessrestrict id="aspace_7bbec2cdc1e6e0cb7b2ff1acb7c9e364">
Expand Down Expand Up @@ -724,8 +732,16 @@
<did>
<unittitle>Series II: Membership,</unittitle>
<unitid>MS C 271.II</unitid>
<physdesc altrender="whole"><extent altrender="materialtype spaceoccupied">1.5 Linear
Feet</extent></physdesc>
<physdesc>
Cards and sheets of various sizes
</physdesc>
<physdesc>
<physfacet>Informational cards</physfacet>
</physdesc>
<physdesc altrender="whole">
<dimensions>various</dimensions>
<extent altrender="materialtype spaceoccupied">1.5 Linear Feet</extent>
</physdesc>
<unitdate normal="1902/1973" type="inclusive">1902-1973</unitdate>
<abstract id="aspace_e806089c6553f2132b727ead99b47a70">Contains a mixture of membership
cards and membership rosters.</abstract>
Expand Down
10 changes: 10 additions & 0 deletions spec/fixtures/ead/sul-spec/a0011.xml
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,23 @@
</origination>
<unitid>A0011</unitid>
<physdesc altrender="part">
Photographic album
</physdesc>
<physdesc altrender="part">
Single bound volume
</physdesc>
<physdesc altrender="part">
<physfacet>Printed material</physfacet>
<extent altrender="materialtype spaceoccupied">1.25 Linear Feet</extent>
<extent altrender="carrier">(1 volume)</extent>
</physdesc>
<physdesc altrender="part">
<dimensions>20 x 20 in.</dimensions>
<extent altrender="materialtype spaceoccupied">1 document case</extent>
</physdesc>
<physdesc altrender="part">
<dimensions>7.5 x 5.5 in.</dimensions>
<physfacet>Digital Video Disc</physfacet>
<extent altrender="materialtype spaceoccupied">16 DVDRs</extent>
</physdesc>
<unitdate normal="1900/1906" type="inclusive">circa 1900-1906
Expand Down

0 comments on commit f3c30db

Please sign in to comment.