Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding missing elements from physdesc #1482

Merged
merged 3 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion lib/arclight/traject/ead2_component_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,25 @@
accumulator.concat settings[:root].output_hash['normalized_title_ssm']
end

# This accumulates direct text from a physdesc, ignoring child elements handled elsewhere
to_field 'physdesc_tesim', extract_xpath('./did/physdesc', to_text: false) do |_record, accumulator|
accumulator.map! do |element|
physdesc = []
element.children.map do |child|
next if child.instance_of?(Nokogiri::XML::Element)

physdesc << child.text&.strip unless child.text&.strip&.empty?
end.flatten
physdesc.join(' ') unless physdesc.empty?
end
end

to_field 'extent_ssm' do |record, accumulator|
physdescs = record.xpath('./did/physdesc')
extents_per_physdesc = physdescs.map do |physdesc|
extents = physdesc.xpath('./extent').map { |e| e.text.strip }
# Join extents within the same physdesc with an empty string
extents.join(' ')
extents.join(' ') unless extents.empty?
end

# Add each physdesc separately to the accumulator
Expand All @@ -183,6 +196,9 @@
accumulator.concat context.output_hash['extent_ssm'] || []
end

to_field 'physfacet_tesim', extract_xpath('./did/physdesc/physfacet')
to_field 'dimensions_tesim', extract_xpath('./did/physdesc/dimensions')

to_field 'creator_ssm', extract_xpath('./did/origination')
to_field 'creator_ssim', extract_xpath('./did/origination')
to_field 'creators_ssim', extract_xpath('./did/origination')
Expand Down
18 changes: 17 additions & 1 deletion lib/arclight/traject/ead2_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,25 @@
end
end

# This accumulates direct text from a physdesc, ignoring child elements handled elsewhere
to_field 'physdesc_tesim', extract_xpath('/ead/archdesc/did/physdesc', to_text: false) do |_record, accumulator|
accumulator.map! do |element|
physdesc = []
element.children.map do |child|
next if child.instance_of?(Nokogiri::XML::Element)

physdesc << child.text&.strip unless child.text&.strip&.empty?
end.flatten
physdesc.join(' ') unless physdesc.empty?
end
end

to_field 'extent_ssm' do |record, accumulator|
physdescs = record.xpath('/ead/archdesc/did/physdesc')
extents_per_physdesc = physdescs.map do |physdesc|
extents = physdesc.xpath('./extent').map { |e| e.text.strip }
# Join extents within the same physdesc with an empty string
extents.join(' ')
extents.join(' ') unless extents.empty?
end

# Add each physdesc separately to the accumulator
Expand All @@ -199,6 +212,9 @@
accumulator.concat context.output_hash['extent_ssm'] || []
end

to_field 'physfacet_tesim', extract_xpath('/ead/archdesc/did/physdesc/physfacet')
to_field 'dimensions_tesim', extract_xpath('/ead/archdesc/did/physdesc/dimensions')

to_field 'genreform_ssim', extract_xpath('/ead/archdesc/controlaccess/genreform')

to_field 'date_range_isim', extract_xpath('/ead/archdesc/did/unitdate/@normal', to_text: false) do |_record, accumulator|
Expand Down
6 changes: 6 additions & 0 deletions lib/generators/arclight/templates/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ class CatalogController < ApplicationController
config.add_background_field 'accruals', field: 'accruals_html_tesm', helper_method: :render_html_tags
config.add_background_field 'phystech', field: 'phystech_html_tesm', helper_method: :render_html_tags
config.add_background_field 'physloc', field: 'physloc_html_tesm', helper_method: :render_html_tags
config.add_background_field 'physdesc', field: 'physdesc_tesim', helper_method: :render_html_tags
config.add_background_field 'physfacet', field: 'physfacet_tesim', helper_method: :render_html_tags
config.add_background_field 'dimensions', field: 'dimensions_tesim', helper_method: :render_html_tags
config.add_background_field 'materialspec', field: 'materialspec_tesim', helper_method: :render_html_tags
config.add_background_field 'fileplan', field: 'fileplan_html_tesim', helper_method: :render_html_tags
config.add_background_field 'descrules', field: 'descrules_ssm', helper_method: :render_html_tags
Expand Down Expand Up @@ -335,6 +338,9 @@ class CatalogController < ApplicationController
config.add_component_field 'phystech', field: 'phystech_html_tesm', helper_method: :render_html_tags
config.add_component_field 'materialspec', field: 'materialspec_tesim', helper_method: :render_html_tags
config.add_component_field 'physloc', field: 'physloc_html_tesm', helper_method: :render_html_tags
config.add_component_field 'physdesc', field: 'physdesc_tesim', helper_method: :render_html_tags
config.add_component_field 'physfacet', field: 'physfacet_tesim', helper_method: :render_html_tags
config.add_component_field 'dimensions', field: 'dimensions_tesim', helper_method: :render_html_tags
config.add_component_field 'fileplan', field: 'fileplan_html_tesim', helper_method: :render_html_tags
config.add_component_field 'altformavail', field: 'altformavail_html_tesim', helper_method: :render_html_tags
config.add_component_field 'otherfindaid', field: 'otherfindaid_html_tesm', helper_method: :render_html_tags
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ en:
accruals: Accruals
phystech: Physical / technical requirements
physloc: Physical location
physdesc: Physical description
physfacet: Physical facet
dimensions: Dimensions
descrules: Rules or conventions

relatedmaterial: Related material
Expand Down
11 changes: 10 additions & 1 deletion spec/features/collection_page_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
end
end

it 'background has configured metadata' do
it 'background has configured metadata' do # rubocop:disable RSpec/MultipleExpectations
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with this.

within '#background' do
expect(page).to have_css('dt', text: 'Scope and content')
expect(page).to have_css('dd', text: /^Correspondence, documents, records, photos/)
Expand All @@ -99,6 +99,15 @@
expect(page).to have_css('dt', text: 'Arrangement')
expect(page).to have_css('dd', text: /^Arranged into seven series\./)

expect(page).to have_css('dt', text: 'Physical description')
expect(page).to have_css('dd', text: /^Boxes and folders/)

expect(page).to have_css('dt', text: 'Physical facet')
expect(page).to have_css('dd', text: /^Compact digital disc/)

expect(page).to have_css('dt', text: 'Dimensions')
expect(page).to have_css('dd', text: /^7\.5 x 5\.5 in\./)

expect(page).to have_css('dt', text: 'Rules or conventions')
expect(page).to have_css('dd', text: /^Finding aid prepared using Rules for Archival Description/)
end
Expand Down
11 changes: 11 additions & 0 deletions spec/features/component_page_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,17 @@
expect(page).to have_css('dd', text: /^These papers were maintained by the staff/)
end

it 'shows configured component fields' do
expect(page).to have_css('dt', text: 'Extent')
expect(page).to have_css('dd', text: /^2 Linear Feet/)
expect(page).to have_css('dt', text: 'Physical description')
expect(page).to have_css('dd', text: /^Mixed Materials/)
expect(page).to have_css('dt', text: 'Dimensions')
expect(page).to have_css('dd', text: /^various/)
expect(page).to have_css('dt', text: 'Physical facet')
expect(page).to have_css('dd', text: /^Boxes and folders/)
end

it 'multivalued notes are rendered as paragaphs' do
within 'dd.blacklight-appraisal' do
expect(page).to have_css('p', count: 2)
Expand Down
67 changes: 56 additions & 11 deletions spec/features/traject/ead2_indexing_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,22 @@ def components(result)
expect(result['places_ssim']).to equal_array_ignoring_whitespace ['Yosemite National Park (Calif.)']
end

it 'physdesc' do
expect(result['extent_ssm']).to equal_array_ignoring_whitespace ['1.25 Linear Feet (1 volume)', '1 document case', '16 DVDRs']
describe 'physdesc' do
it 'direct text' do
expect(result['physdesc_tesim']).to equal_array_ignoring_whitespace ['Photographic album', 'Single bound volume']
end

it 'extent' do
expect(result['extent_tesim']).to equal_array_ignoring_whitespace ['1.25 Linear Feet (1 volume)', '1 document case', '16 DVDRs']
end

it 'physfacet' do
expect(result['physfacet_tesim']).to equal_array_ignoring_whitespace ['Printed material', 'Digital Video Disc']
end

it 'dimensions' do
expect(result['dimensions_tesim']).to equal_array_ignoring_whitespace ['20 x 20 in.', '7.5 x 5.5 in.']
end
end

it 'has_online_content' do
Expand Down Expand Up @@ -295,18 +309,49 @@ def components(result)
).to_a.first
end

it 'extent at the collection level' do
%w[extent_ssm extent_tesim].each do |field|
expect(result[field]).to equal_array_ignoring_whitespace(['15.0 linear feet (36 boxes + oversize folder)', '3 CDs'])
describe 'physdesc at the collection level' do
it 'direct text' do
expect(result['physdesc_tesim']).to equal_array_ignoring_whitespace ['Boxes and folders', 'Compact discs']
end

it 'extent' do
%w[extent_ssm extent_tesim].each do |field|
expect(result[field]).to equal_array_ignoring_whitespace(['15.0 linear feet (36 boxes + oversize folder)', '3 CDs'])
end
end

it 'physfacet' do
expect(result['physfacet_tesim']).to equal_array_ignoring_whitespace ['Compact digital disc']
end

it 'dimensions' do
expect(result['dimensions_tesim']).to equal_array_ignoring_whitespace ['7.5 x 5.5 in.']
end
end

it 'extent at the component level' do
component = all_components.find { |c| c['ref_ssi'] == ['aspace_a951375d104030369a993ff943f61a77'] }
%w[extent_ssm extent_tesim].each do |field|
expect(component[field]).to equal_array_ignoring_whitespace(
['1.5 Linear Feet']
)
describe 'physdesc at the component level' do
let(:component) do
all_components.find { |c| c['ref_ssi'] == ['aspace_a951375d104030369a993ff943f61a77'] }
end

it 'direct text' do
expect(component['physdesc_tesim']).to equal_array_ignoring_whitespace ['Cards and sheets of various sizes']
end

it 'extent' do
%w[extent_ssm extent_tesim].each do |field|
expect(component[field]).to equal_array_ignoring_whitespace(
['1.5 Linear Feet']
)
end
end

it 'physfacet' do
expect(component['physfacet_tesim']).to equal_array_ignoring_whitespace ['Informational cards']
end

it 'dimensions' do
expect(component['dimensions_tesim']).to equal_array_ignoring_whitespace ['various']
end
end

Expand Down
32 changes: 29 additions & 3 deletions spec/fixtures/ead/nlm/alphaomegaalpha.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,19 @@
<corpname source="ingest">Alpha Omega Alpha</corpname>
</origination>
<unitid>MS C 271</unitid>
<physdesc>
Boxes and folders
</physdesc>
<physdesc>
Compact discs
</physdesc>
<physdesc altrender="part">
<extent altrender="materialtype spaceoccupied">15.0 linear feet (36 boxes + oversize
folder)</extent>
</physdesc>
<physdesc altrender="part">
<dimensions>7.5 x 5.5 in.</dimensions>
<physfacet>Compact digital disc</physfacet>
<extent altrender="materialtype spaceoccupied">3 CDs</extent>
</physdesc>
<unitdate normal="1894/1992" type="inclusive">1894-1992</unitdate>
Expand Down Expand Up @@ -393,7 +401,7 @@
<bioghist id="aspace_ff0f536406ce214f5u38e1f7c798d76e">
<head>Historical Note</head>
<p>
The Society started keeping consistent adminstrative records in 1975, the same year that E. L. Doctorow published <emph render="italic">Ragtime</emph>.
The Society started keeping consistent adminstrative records in 1975, the same year that E. L. Doctorow published <emph render="italic">Ragtime</emph>.
</p>
</bioghist>
<accessrestrict id="aspace_7bbec2cdc1e6e0cb7b2ff1acb7c9e364">
Expand Down Expand Up @@ -466,6 +474,16 @@
<did>
<unittitle>Constitution and by-laws - drafts,</unittitle>
<unitdate normal="1902/1904" type="inclusive">1902-1904</unitdate>
<physdesc>
Mixed Materials
</physdesc>
<physdesc>
<physfacet>Boxes and folders</physfacet>
</physdesc>
<physdesc altrender="whole">
<dimensions>various</dimensions>
<extent altrender="materialtype spaceoccupied">2 Linear Feet</extent>
</physdesc>
<container id="aspace_19171bacef2f302c352195eaafca6b75" label="Mixed Materials"
type="box">1</container>
<container id="aspace_4cceb568b39913f2342e43dd3d54b772"
Expand Down Expand Up @@ -724,8 +742,16 @@
<did>
<unittitle>Series II: Membership,</unittitle>
<unitid>MS C 271.II</unitid>
<physdesc altrender="whole"><extent altrender="materialtype spaceoccupied">1.5 Linear
Feet</extent></physdesc>
<physdesc>
Cards and sheets of various sizes
</physdesc>
<physdesc>
<physfacet>Informational cards</physfacet>
</physdesc>
<physdesc altrender="whole">
<dimensions>various</dimensions>
<extent altrender="materialtype spaceoccupied">1.5 Linear Feet</extent>
</physdesc>
<unitdate normal="1902/1973" type="inclusive">1902-1973</unitdate>
<abstract id="aspace_e806089c6553f2132b727ead99b47a70">Contains a mixture of membership
cards and membership rosters.</abstract>
Expand Down
10 changes: 10 additions & 0 deletions spec/fixtures/ead/sul-spec/a0011.xml
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,23 @@
</origination>
<unitid>A0011</unitid>
<physdesc altrender="part">
Photographic album
</physdesc>
<physdesc altrender="part">
Single bound volume
</physdesc>
<physdesc altrender="part">
<physfacet>Printed material</physfacet>
<extent altrender="materialtype spaceoccupied">1.25 Linear Feet</extent>
<extent altrender="carrier">(1 volume)</extent>
</physdesc>
<physdesc altrender="part">
<dimensions>20 x 20 in.</dimensions>
<extent altrender="materialtype spaceoccupied">1 document case</extent>
</physdesc>
<physdesc altrender="part">
<dimensions>7.5 x 5.5 in.</dimensions>
<physfacet>Digital Video Disc</physfacet>
<extent altrender="materialtype spaceoccupied">16 DVDRs</extent>
</physdesc>
<unitdate normal="1900/1906" type="inclusive">circa 1900-1906
Expand Down