diff --git a/app/models/concerns/digital_object/assets/file_import.rb b/app/models/concerns/digital_object/assets/file_import.rb index 779dc4066..16620a35a 100644 --- a/app/models/concerns/digital_object/assets/file_import.rb +++ b/app/models/concerns/digital_object/assets/file_import.rb @@ -5,14 +5,9 @@ module DigitalObject::Assets::FileImport def do_file_import convert_upload_import_to_internal! - case @import_file_import_type - when DigitalObject::Asset::IMPORT_TYPE_INTERNAL, DigitalObject::Asset::IMPORT_TYPE_POST_DATA - final_save_location_uri, checksum_hexdigest_uri, import_file_size = handle_internal_import(@import_file_import_path) - when DigitalObject::Asset::IMPORT_TYPE_EXTERNAL - final_save_location_uri, checksum_hexdigest_uri, import_file_size = handle_external_import(@import_file_import_path) - else - raise "Unexpected @import_file_import_type: #{@import_file_import_type.inspect}" - end + final_save_location_uri, checksum_hexdigest_uri, import_file_size = handle_import_based_on_import_type( + @import_file_import_type, @import_file_import_path + ) original_filename = File.basename(@import_file_original_file_path || @import_file_import_path) @@ -49,12 +44,24 @@ def do_file_import self.original_file_path = (@import_file_original_file_path || @import_file_import_path) # This also updates the 'content' datastream label end + def handle_import_based_on_import_type(import_file_import_type, import_file_import_path) + case @import_file_import_type + when DigitalObject::Asset::IMPORT_TYPE_INTERNAL, DigitalObject::Asset::IMPORT_TYPE_POST_DATA + final_save_location_uri, checksum_hexdigest_uri, import_file_size = handle_internal_import(@import_file_import_path) + when DigitalObject::Asset::IMPORT_TYPE_EXTERNAL + final_save_location_uri, checksum_hexdigest_uri, import_file_size = handle_external_import(@import_file_import_path) + else + raise "Unexpected @import_file_import_type: #{@import_file_import_type.inspect}" + end + end + # Verifies the existence of the source file and collects its whole file checksum def handle_external_import(import_file_import_path) import_file_import_path = "file://#{Addressable::URI.encode(import_file_import_path)}" if import_file_import_path.start_with?('/') storage_object = Hyacinth::Storage.storage_object_for(import_file_import_path) raise "External file not found at: #{storage_object.path}" unless storage_object.exist? + raise Hyacinth::Exceptions::ZeroByteFileError, 'Original file file size is 0 bytes. File must contain data.' if storage_object.size == 0 case storage_object when Hyacinth::Storage::FileObject diff --git a/lib/hyacinth/utils/csv_import_export_utils.rb b/lib/hyacinth/utils/csv_import_export_utils.rb index 3fa67fa64..b183be5c2 100644 --- a/lib/hyacinth/utils/csv_import_export_utils.rb +++ b/lib/hyacinth/utils/csv_import_export_utils.rb @@ -118,28 +118,27 @@ def self.validate_import_job_csv_data(csv_data_string, user, import_job) end def self.validate_import_file_import_path_if_present(digital_object_data, csv_row_number, import_job) - if digital_object_data['import_file'].present? && digital_object_data['import_file']['import_path'].present? + return unless digital_object_data.dig('import_file', 'import_path').present? - import_file_type = digital_object_data['import_file']['import_type'] - import_file_path = digital_object_data['import_file']['import_path'] + import_file_type = digital_object_data['import_file']['import_type'] + import_file_path = digital_object_data['import_file']['import_path'] - # Concatenate upload directory path with import_file_path if this file comes from the upload directory - if import_file_type == DigitalObject::Asset::IMPORT_TYPE_UPLOAD_DIRECTORY - import_file_path = File.join(HYACINTH[:upload_directory], import_file_path) - end + # Concatenate upload directory path with import_file_path if this file comes from the upload directory + if import_file_type == DigitalObject::Asset::IMPORT_TYPE_UPLOAD_DIRECTORY + import_file_path = File.join(HYACINTH[:upload_directory], import_file_path) + end - # Check to see if a file exists at import_file_path - if import_file_path.start_with?('/') - import_job.errors.add( - :file_not_found, - "For CSV row #{csv_row_number}, could not find file at _import_file.import_path => " + import_file_path - ) unless File.file?(import_file_path) - elsif !import_file_path.start_with?('s3://') - import_job.errors.add( - :unhandled_import_path, - "For CSV row #{csv_row_number}, unable to handle value for _import_file.import_path => " + import_file_path - ) - end + # Check to see if a file exists at import_file_path + if import_file_path.start_with?('/') + import_job.errors.add( + :file_not_found, + "For CSV row #{csv_row_number}, could not find file at _import_file.import_path => " + import_file_path + ) unless File.file?(import_file_path) + elsif !import_file_path.start_with?('s3://') + import_job.errors.add( + :unhandled_import_path, + "For CSV row #{csv_row_number}, unable to handle value for _import_file.import_path => " + import_file_path + ) end end diff --git a/spec/models/digital_object/asset_spec.rb b/spec/models/digital_object/asset_spec.rb index 80edf6985..ec508e65a 100644 --- a/spec/models/digital_object/asset_spec.rb +++ b/spec/models/digital_object/asset_spec.rb @@ -25,7 +25,7 @@ expect(save_result).to eq(false) end - it "stores an asset's checksum in :has_message_digest relationship on the 'content' datastream, and can retrieve that checksum using the #checksum method", focus: true do + it "stores an asset's checksum in :has_message_digest relationship on the 'content' datastream, and can retrieve that checksum using the #checksum method" do asset = DigitalObject::Asset.new allow(asset).to receive(:allowed_publish_targets).and_return([]) allow(asset).to receive(:next_pid).and_return('some:pid') diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index e4dd3861c..331cf4a51 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -70,6 +70,7 @@ def sign_in_admin_user_controller_spec() def feature_spec_sign_in_admin_user visit '/users/sign_in' + sleep 1 # Sleeping for a second because this test occasionally times out within("#new_user") do fill_in 'user_email', :with => 'hyacinth-test@library.columbia.edu' fill_in 'user_password', :with => 'iamthetest' diff --git a/spec/requests/digital_objects_spec.rb b/spec/requests/digital_objects_spec.rb index 6a406ecab..4cb471e44 100644 --- a/spec/requests/digital_objects_spec.rb +++ b/spec/requests/digital_objects_spec.rb @@ -101,7 +101,7 @@ expect(response_json['pid'].length).not_to eq(0) end - it "works via filesystem upload (upload type: internal), copying the target file to the Hyacinth internal data store", focus: true do + it "works via filesystem upload (upload type: internal), copying the target file to the Hyacinth internal data store" do asset_digital_object_data = sample_asset_digital_object_data path_to_fixture_file = fixture('files/lincoln.jpg').path @@ -123,11 +123,11 @@ # Make sure that path to DigitalObject::Asset is internal, and stored within the hyacinth asset directory digital_object = DigitalObject::Base.find(response_json['pid']) - expect(digital_object.filesystem_location).to start_with(HYACINTH[:default_asset_home]) + expect(digital_object.filesystem_location).to start_with("file://#{HYACINTH[:default_asset_home]}") expect(digital_object.original_file_path).to eq(path_to_fixture_file) end - it "works via filesystem upload (upload type: external), referencing the target external file instead of copying the file to the Hyacinth internal data store", focus: true do + it "works via filesystem upload (upload type: external), referencing the target external file instead of copying the file to the Hyacinth internal data store" do asset_digital_object_data = sample_asset_digital_object_data path_to_fixture_file = fixture('files/lincoln.jpg').path @@ -149,7 +149,7 @@ # Make sure that path to DigitalObject::Asset is external, and continues to exist in the external directory referenced during the upload digital_object = DigitalObject::Base.find(response_json['pid']) - expect(digital_object.filesystem_location).to eq(path_to_fixture_file) + expect(digital_object.filesystem_location).to eq("file://#{path_to_fixture_file}") expect(digital_object.original_file_path).to eq(path_to_fixture_file) end