From 1f755699702822040e4b504d14583a773a0590ac Mon Sep 17 00:00:00 2001 From: krwong <69482343+krwong@users.noreply.github.com> Date: Thu, 15 Aug 2024 08:41:21 -0400 Subject: [PATCH] BXC-4652 implement index --from-csv (#104) * implement index --from-csv * fix javadoc, csvFile path, removed unused code, readjust logic, parse generic csv when indexing, fix/add tests, dmrecord field * move force flag to CdmIndexOptions, remove ExportObjectsService from CdmIndexService, remove csv-specific methods, simplify code, rename assertCsvImportExists * remove conditional, add MIGRATION_FIELDS to exportFields in indexAllFromCsv * text primary key not null * text primary key not null if indexing from csv file * text primary key not null, fix test --- .../boxc/migration/cdm/CdmIndexCommand.java | 30 ++- .../migration/cdm/ExportObjectsCommand.java | 2 - .../cdm/model/ExportObjectsInfo.java | 2 +- .../cdm/options/CdmIndexOptions.java | 35 ++++ .../cdm/services/CdmFieldService.java | 36 +++- .../cdm/services/CdmIndexService.java | 67 +++++- .../boxc/migration/cdm/CdmIndexCommandIT.java | 19 ++ .../cdm/services/CdmFieldServiceTest.java | 22 ++ .../cdm/services/CdmIndexServiceTest.java | 195 ++++++++++++++---- .../migration/cdm/test/SipServiceHelper.java | 9 +- src/test/resources/files/exported_objects.csv | 4 + src/test/resources/files/more_fields.csv | 4 + 12 files changed, 368 insertions(+), 57 deletions(-) create mode 100644 src/main/java/edu/unc/lib/boxc/migration/cdm/options/CdmIndexOptions.java create mode 100644 src/test/resources/files/exported_objects.csv create mode 100644 src/test/resources/files/more_fields.csv diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommand.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommand.java index b93f9ec2..ac377d60 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommand.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommand.java @@ -4,9 +4,13 @@ import static org.slf4j.LoggerFactory.getLogger; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.concurrent.Callable; +import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException; +import edu.unc.lib.boxc.migration.cdm.model.CdmFieldInfo; +import edu.unc.lib.boxc.migration.cdm.options.CdmIndexOptions; import org.slf4j.Logger; import edu.unc.lib.boxc.migration.cdm.exceptions.StateAlreadyExistsException; @@ -14,28 +18,28 @@ import edu.unc.lib.boxc.migration.cdm.services.CdmFieldService; import edu.unc.lib.boxc.migration.cdm.services.CdmIndexService; import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory; +import picocli.CommandLine.Mixin; import picocli.CommandLine.Command; -import picocli.CommandLine.Option; import picocli.CommandLine.ParentCommand; /** * @author bbpennel */ @Command(name = "index", - description = "Index the exported CDM records for this project. Must be run after a complete export.") + description = "Populate the index of object records for this project. Must be run after " + + "exporting source metadata or providing a CSV file.") public class CdmIndexCommand implements Callable { private static final Logger log = getLogger(CdmIndexCommand.class); @ParentCommand private CLIMain parentCommand; - @Option(names = { "-f", "--force"}, - description = "Overwrite index if one already exists") - private boolean force; - private CdmFieldService fieldService; private CdmIndexService indexService; private MigrationProject project; + @Mixin + private CdmIndexOptions options; + @Override public Integer call() throws Exception { long start = System.nanoTime(); @@ -43,8 +47,18 @@ public Integer call() throws Exception { try { initialize(); - indexService.createDatabase(force); - indexService.indexAll(); + // if user provides csv, check that it exists + if (options.getCsvFile() != null) { + if (Files.exists(options.getCsvFile())) { + CdmFieldInfo csvExportFields = fieldService.retrieveFieldsFromCsv(options.getCsvFile()); + fieldService.persistFieldsToProject(project, csvExportFields); + } else { + throw new MigrationException("No csv file exists in " + options.getCsvFile()); + } + } + + indexService.createDatabase(options); + indexService.index(options); // Display any warning messages to user if (!indexService.getIndexingWarnings().isEmpty()) { indexService.getIndexingWarnings().forEach(msg -> outputLogger.info(msg)); diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java index 4b9ba956..630d3ffa 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java @@ -1,6 +1,5 @@ package edu.unc.lib.boxc.migration.cdm; -import edu.unc.lib.boxc.migration.cdm.exceptions.InvalidProjectStateException; import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; import edu.unc.lib.boxc.migration.cdm.services.ExportObjectsService; import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory; @@ -8,7 +7,6 @@ import picocli.CommandLine.Command; import picocli.CommandLine.ParentCommand; -import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.util.concurrent.Callable; diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java index 9d493b37..ef4493de 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java @@ -8,7 +8,7 @@ * @author krwong */ public class ExportObjectsInfo { - public static final String RECORD_ID = "record_id"; + public static final String RECORD_ID = CdmFieldInfo.CDM_ID; public static final String FILE_PATH = "file_path"; public static final String FILENAME = "filename"; public static final String[] CSV_HEADERS = new String[] {RECORD_ID, FILE_PATH, FILENAME}; diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/options/CdmIndexOptions.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/options/CdmIndexOptions.java new file mode 100644 index 00000000..209d0c46 --- /dev/null +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/options/CdmIndexOptions.java @@ -0,0 +1,35 @@ +package edu.unc.lib.boxc.migration.cdm.options; + +import picocli.CommandLine.Option; + +import java.nio.file.Path; + +/** + * Options for indexing object records + * @author krwong + */ +public class CdmIndexOptions { + @Option(names = {"-c", "--from-csv"}, + description = {"Export objects CSV file used as source for populating sqlite database."}) + private Path csvFile; + + @Option(names = { "-f", "--force"}, + description = "Overwrite index if one already exists") + private boolean force; + + public Path getCsvFile() { + return csvFile; + } + + public void setCsvFile(Path csvFile) { + this.csvFile = csvFile; + } + + public boolean getForce() { + return force; + } + + public void setForce(boolean force) { + this.force = force; + } +} diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldService.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldService.java index f95fef5e..e70b6717 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldService.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldService.java @@ -42,6 +42,7 @@ public class CdmFieldService { private CloseableHttpClient httpClient; private String cdmBaseUri; + private MigrationProject project; private static final String CDM_NICK_FIELD = "nick"; private static final String CDM_NAME_FIELD = "name"; @@ -69,7 +70,6 @@ public CdmFieldService() { /** * Get the URL for retrieving field info for the given collection - * @param cdmBaseUri * @param collectionId * @return */ @@ -129,7 +129,7 @@ public CdmFieldInfo retrieveFieldsForCollection(String collectionId) throws IOEx } /** - * Persist the field information out to the project project + * Persist the field information out to the project * @param project * @param fieldInfo * @throws IOException @@ -234,6 +234,34 @@ private void validateFieldName(String field, String headerField, int line, Set headers = parser.getRecords().get(0).toList(); + for (String header : headers) { + CdmFieldEntry fieldEntry = new CdmFieldEntry(); + fieldEntry.setNickName(header); + fieldEntry.setExportAs(header); + fieldEntry.setDescription(header); + fieldEntry.setSkipExport(false); + fieldInfo.getFields().add(fieldEntry); + } + } catch (Exception e) { + throw new MigrationException("Failed to parse exported objects path " + exportedObjectsPath, e); + } + return fieldInfo; + } + private String booleanToString(boolean bool) { return bool ? "y" : "n"; } @@ -245,4 +273,8 @@ public void setHttpClient(CloseableHttpClient httpClient) { public void setCdmBaseUri(String cdmBaseUri) { this.cdmBaseUri = cdmBaseUri; } + + public void setProject(MigrationProject project) { + this.project = project; + } } diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexService.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexService.java index 4618a0c5..ffdb1acc 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexService.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexService.java @@ -6,7 +6,11 @@ import edu.unc.lib.boxc.migration.cdm.exceptions.StateAlreadyExistsException; import edu.unc.lib.boxc.migration.cdm.model.CdmFieldInfo; import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; +import edu.unc.lib.boxc.migration.cdm.options.CdmIndexOptions; import edu.unc.lib.boxc.migration.cdm.util.ProjectPropertiesSerialization; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; import org.apache.commons.lang3.StringUtils; import org.jdom2.Document; import org.jdom2.Element; @@ -16,6 +20,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.Reader; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.sql.Connection; @@ -59,6 +64,14 @@ public class CdmIndexService { private String recordInsertSqlTemplate; private List indexingWarnings = new ArrayList<>(); + public void index(CdmIndexOptions options) throws Exception { + if (options.getCsvFile() != null) { + indexAllFromCsv(options); + } else { + indexAll(); + } + } + /** * Indexes all exported CDM records for this project * @throws IOException @@ -292,15 +305,16 @@ private void indexObject(Connection conn, List exportFieldValues) /** * Create the index database with all cdm and migration fields - * @param force + * @param options * @throws IOException */ - public void createDatabase(boolean force) throws IOException { - ensureDatabaseState(force); + public void createDatabase(CdmIndexOptions options) throws IOException { + ensureDatabaseState(options.getForce()); CdmFieldInfo fieldInfo = fieldService.loadFieldsFromProject(project); - List exportFields = new ArrayList<>(fieldInfo.listAllExportFields()); + List exportFields = fieldInfo.listAllExportFields(); exportFields.addAll(MIGRATION_FIELDS); + StringBuilder queryBuilder = new StringBuilder("CREATE TABLE " + TB_NAME + " (\n"); for (int i = 0; i < exportFields.size(); i++) { String field = exportFields.get(i); @@ -327,7 +341,7 @@ public void createDatabase(boolean force) throws IOException { private String indexFieldType(String exportField) { if (CdmFieldInfo.CDM_ID.equals(exportField)) { - return "INT PRIMARY KEY NOT NULL"; + return "TEXT PRIMARY KEY NOT NULL"; } else if (CHILD_ORDER_FIELD.equals(exportField)) { return "INT"; } else { @@ -335,6 +349,49 @@ private String indexFieldType(String exportField) { } } + /** + * Indexes all exported objects for this project + * @param options + * @throws IOException + */ + public void indexAllFromCsv(CdmIndexOptions options) throws IOException { + assertCsvImportExists(options); + + CdmFieldInfo fieldInfo = fieldService.loadFieldsFromProject(project); + List exportFields = fieldInfo.listAllExportFields(); + exportFields.addAll(MIGRATION_FIELDS); + recordInsertSqlTemplate = makeInsertTemplate(exportFields); + + try ( + var conn = openDbConnection(); + Reader reader = Files.newBufferedReader(options.getCsvFile()); + CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT + .withFirstRecordAsHeader() + .withHeader(String.valueOf(exportFields)) + .withTrim()); + ) { + for (CSVRecord csvRecord : csvParser) { + if (!csvRecord.get(0).isEmpty()) { + List fieldValues = csvRecord.toList(); + indexObject(conn, fieldValues); + } + } + } catch (IOException e) { + throw new MigrationException("Failed to read export files", e); + } catch (SQLException e) { + throw new MigrationException("Failed to update database", e); + } + + project.getProjectProperties().setIndexedDate(Instant.now()); + ProjectPropertiesSerialization.write(project); + } + + private void assertCsvImportExists(CdmIndexOptions options) { + if (Files.notExists(options.getCsvFile())) { + throw new InvalidProjectStateException("User provided csv must exist prior to indexing"); + } + } + private void ensureDatabaseState(boolean force) { if (Files.exists(project.getIndexPath())) { if (force) { diff --git a/src/test/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommandIT.java b/src/test/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommandIT.java index 22d99ce4..c1970f3b 100644 --- a/src/test/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommandIT.java +++ b/src/test/java/edu/unc/lib/boxc/migration/cdm/CdmIndexCommandIT.java @@ -121,6 +121,25 @@ public void indexWithWarningsTest() throws Exception { assertOutputContains("CPD file referenced by object 604 in desc.all was not found"); } + @Test + public void indexFromCsvTest() throws Exception { + initProject(); + Files.createDirectories(project.getExportPath()); + + Files.copy(Paths.get("src/test/resources/files/exported_objects.csv"), project.getExportObjectsPath()); + setExportedDate(); + + String[] args = new String[] { + "-w", project.getProjectPath().toString(), + "index", + "-c", "src/test/resources/files/exported_objects.csv"}; + executeExpectSuccess(args); + + assertTrue(Files.exists(project.getIndexPath())); + assertTrue(Files.exists(project.getFieldsPath())); + assertDateIndexedPresent(); + } + private void setExportedDate() throws Exception { project.getProjectProperties().setExportedDate(Instant.now()); ProjectPropertiesSerialization.write(project); diff --git a/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldServiceTest.java b/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldServiceTest.java index d15a5108..3f348f90 100644 --- a/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldServiceTest.java +++ b/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmFieldServiceTest.java @@ -12,9 +12,11 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; import java.util.Optional; +import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo; import edu.unc.lib.boxc.migration.cdm.test.BxcEnvironmentHelper; import edu.unc.lib.boxc.migration.cdm.test.CdmEnvironmentHelper; import org.apache.commons.io.FileUtils; @@ -63,6 +65,7 @@ public void setup() throws Exception { service = new CdmFieldService(); service.setHttpClient(httpClient); service.setCdmBaseUri(CDM_BASE_URL); + service.setProject(project); when(httpClient.execute(any(HttpGet.class))).thenReturn(httpResp); when(httpResp.getEntity()).thenReturn(respEntity); @@ -342,6 +345,25 @@ public void retrieveValidateAndReloadRoundTripTest() throws Exception { "BLANK", fieldsLoaded); } + @Test + public void retrieveCdmFieldsFromCsvTest() throws Exception { + Files.copy(Paths.get("src/test/resources/files/exported_objects.csv"), + project.getExportObjectsPath()); + + CdmFieldInfo fieldInfo = service.retrieveFieldsFromCsv(Paths.get("src/test/resources/files/exported_objects.csv")); + List fields = fieldInfo.getFields(); + + assertHasFieldWithValue(ExportObjectsInfo.RECORD_ID, ExportObjectsInfo.RECORD_ID, ExportObjectsInfo.RECORD_ID, + false, null, null, null, + null, null, fields); + assertHasFieldWithValue(ExportObjectsInfo.FILE_PATH, ExportObjectsInfo.FILE_PATH, ExportObjectsInfo.FILE_PATH, + false, null, null, null, + null, null, fields); + assertHasFieldWithValue(ExportObjectsInfo.FILENAME, ExportObjectsInfo.FILENAME, ExportObjectsInfo.FILENAME, + false, null, null, null, + null, null, fields); + } + private void assertHasFieldWithValue(String nick, String expectedExport, String expectedDesc, boolean expectedSkip, String expectedCdmRequired, String expectedCdmSearchable, String expectedCdmHidden, String expectedCdmVocab, String expectedCdmDcMapping, List fields) { diff --git a/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexServiceTest.java b/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexServiceTest.java index 78288b15..46d5b55e 100644 --- a/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexServiceTest.java +++ b/src/test/java/edu/unc/lib/boxc/migration/cdm/services/CdmIndexServiceTest.java @@ -3,8 +3,10 @@ import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException; import edu.unc.lib.boxc.migration.cdm.exceptions.StateAlreadyExistsException; import edu.unc.lib.boxc.migration.cdm.model.CdmFieldInfo; +import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo; import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; import edu.unc.lib.boxc.migration.cdm.model.MigrationProjectProperties; +import edu.unc.lib.boxc.migration.cdm.options.CdmIndexOptions; import edu.unc.lib.boxc.migration.cdm.test.BxcEnvironmentHelper; import edu.unc.lib.boxc.migration.cdm.test.CdmEnvironmentHelper; import edu.unc.lib.boxc.migration.cdm.util.ProjectPropertiesSerialization; @@ -64,8 +66,10 @@ public void indexExportOneFileTest() throws Exception { CdmFileRetrievalService.getDescAllPath(project)); Files.copy(Paths.get("src/test/resources/gilmer_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertDateIndexedPresent(); @@ -79,32 +83,35 @@ public void indexExportOneFileTest() throws Exception { Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery("select " + String.join(",", exportFields) + " from " + CdmIndexService.TB_NAME + " order by " + CdmFieldInfo.CDM_ID + " asc"); - rs.next(); - assertEquals(25, rs.getInt(CdmFieldInfo.CDM_ID)); - assertEquals("2005-11-23", rs.getString(CdmFieldInfo.CDM_CREATED)); - assertEquals("Redoubt C", rs.getString("title")); - assertEquals("Paper is discolored.", rs.getString("notes")); - assertEquals("276_182_E.tif", rs.getString("file")); - try { - rs.getString("search"); - fail("Skipped field must not be indexed"); - } catch (SQLException e) { + while (rs.next()) { + int id = rs.getInt(CdmFieldInfo.CDM_ID); + if (id == 25) { + assertEquals(25, rs.getInt(CdmFieldInfo.CDM_ID)); + assertEquals("2005-11-23", rs.getString(CdmFieldInfo.CDM_CREATED)); + assertEquals("Redoubt C", rs.getString("title")); + assertEquals("Paper is discolored.", rs.getString("notes")); + assertEquals("276_182_E.tif", rs.getString("file")); + try { + rs.getString("search"); + fail("Skipped field must not be indexed"); + } catch (SQLException e) { + } + } + if (id == 26) { + assertEquals(26, rs.getInt(CdmFieldInfo.CDM_ID)); + assertEquals("2005-11-24", rs.getString(CdmFieldInfo.CDM_CREATED)); + assertEquals("Plan of Battery McIntosh", rs.getString("title")); + assertEquals("Paper", rs.getString("medium")); + assertEquals("276_183_E.tif", rs.getString("file")); + } + if (id == 27) { + assertEquals(27, rs.getInt(CdmFieldInfo.CDM_ID)); + assertEquals("2005-12-08", rs.getString(CdmFieldInfo.CDM_CREATED)); + assertEquals("Fort DeRussy on Red River, Louisiana", rs.getString("title")); + assertEquals("Bill Richards", rs.getString("creatb")); + assertEquals("276_203_E.tif", rs.getString("file")); + } } - - rs.next(); - assertEquals(26, rs.getInt(CdmFieldInfo.CDM_ID)); - assertEquals("2005-11-24", rs.getString(CdmFieldInfo.CDM_CREATED)); - assertEquals("Plan of Battery McIntosh", rs.getString("title")); - assertEquals("Paper", rs.getString("medium")); - assertEquals("276_183_E.tif", rs.getString("file")); - - rs.next(); - assertEquals(27, rs.getInt(CdmFieldInfo.CDM_ID)); - assertEquals("2005-12-08", rs.getString(CdmFieldInfo.CDM_CREATED)); - assertEquals("Fort DeRussy on Red River, Louisiana", rs.getString("title")); - assertEquals("Bill Richards", rs.getString("creatb")); - assertEquals("276_203_E.tif", rs.getString("file")); - } finally { CdmIndexService.closeDbConnection(conn); } @@ -116,10 +123,12 @@ public void indexAlreadyExistsTest() throws Exception { CdmFileRetrievalService.getDescAllPath(project)); Files.copy(Paths.get("src/test/resources/gilmer_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); try { - service.createDatabase(false); + service.createDatabase(options); fail(); } catch (StateAlreadyExistsException e) { assertTrue(e.getMessage().contains("Cannot create index, an index file already exists")); @@ -133,15 +142,18 @@ public void indexAlreadyExistsForceFlagTest() throws Exception { CdmFileRetrievalService.getDescAllPath(project)); Files.copy(Paths.get("src/test/resources/gilmer_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertRowCount(3); // Switch desc to full set and force a reindex Files.copy(Paths.get("src/test/resources/descriptions/gilmer/index/description/desc.all"), CdmFileRetrievalService.getDescAllPath(project), StandardCopyOption.REPLACE_EXISTING); - service.createDatabase(true); + options.setForce(true); + service.createDatabase(options); service.indexAll(); assertRowCount(161); @@ -154,8 +166,10 @@ public void removeIndexTest() throws Exception { CdmFileRetrievalService.getDescAllPath(project)); Files.copy(Paths.get("src/test/resources/gilmer_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertRowCount(3); @@ -170,8 +184,10 @@ public void invalidExportFileTest() throws Exception { FileUtils.write(CdmFileRetrievalService.getDescAllPath(project).toFile(), "uh oh", ISO_8859_1); Files.copy(Paths.get("src/test/resources/gilmer_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); try { service.indexAll(); fail(); @@ -188,8 +204,10 @@ public void missingConfiguredFieldTest() throws Exception { fieldString += "\nmystery,mystery,Mysterious,false,0,0,0,0,mystery"; FileUtils.writeStringToFile(project.getFieldsPath().toFile(), fieldString, ISO_8859_1); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); CdmFieldInfo fieldInfo = fieldService.loadFieldsFromProject(project); @@ -220,8 +238,10 @@ public void indexExportWithCompoundObjectsTest() throws Exception { CdmFileRetrievalService.getExportedCpdsPath(project).resolve("620.cpd")); Files.copy(Paths.get("src/test/resources/keepsakes_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertDateIndexedPresent(); @@ -312,8 +332,10 @@ public void indexExportWithMissingCompoundObjectTest() throws Exception { CdmFileRetrievalService.getExportedCpdsPath(project).resolve("620.cpd")); Files.copy(Paths.get("src/test/resources/keepsakes_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertDateIndexedPresent(); @@ -380,8 +402,10 @@ public void indexExportFieldContainsNewlinesTest() throws Exception { CdmFileRetrievalService.getDescAllPath(project)); Files.copy(Paths.get("src/test/resources/descriptions/plantations/cdm_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertDateIndexedPresent(); @@ -424,8 +448,10 @@ public void indexExportWithMonographCompoundObjectsTest() throws Exception { CdmFileRetrievalService.getExportedCpdsPath(project).resolve("196.cpd")); Files.copy(Paths.get("src/test/resources/monograph_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertDateIndexedPresent(); @@ -486,8 +512,10 @@ public void indexExportReservedWordFieldTest() throws Exception { CdmFileRetrievalService.getDescAllPath(project)); Files.copy(Paths.get("src/test/resources/roy_brown/cdm_fields.csv"), project.getFieldsPath()); setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(false); - service.createDatabase(false); + service.createDatabase(options); service.indexAll(); assertDateIndexedPresent(); @@ -629,6 +657,97 @@ public void buildDocumentWithLongFieldTest() throws Exception { assertEquals("7", rootEl.getChildText("dmrecord")); } + @Test + public void indexFromCsvTest() throws Exception { + CdmFieldInfo csvExportFields = fieldService.retrieveFieldsFromCsv(Paths.get("src/test/resources/files/exported_objects.csv")); + fieldService.persistFieldsToProject(project, csvExportFields); + setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setCsvFile(Paths.get("src/test/resources/files/exported_objects.csv")); + options.setForce(false); + + service.createDatabase(options); + service.indexAllFromCsv(options); + + assertDateIndexedPresent(); + assertRowCount(3); + + CdmFieldInfo fieldInfo = fieldService.loadFieldsFromProject(project); + List exportFields = fieldInfo.listAllExportFields(); + + Connection conn = service.openDbConnection(); + try { + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("select " + String.join(",", exportFields) + + " from " + CdmIndexService.TB_NAME + " order by " + ExportObjectsInfo.RECORD_ID + " asc"); + rs.next(); + assertEquals("test-00001", rs.getString(ExportObjectsInfo.RECORD_ID)); + assertEquals("src/test/resources/files/D2_035_Varners_DrugStore_interior.tif", + rs.getString(ExportObjectsInfo.FILE_PATH)); + assertEquals("D2_035_Varners_DrugStore_interior.tif", rs.getString(ExportObjectsInfo.FILENAME)); + + rs.next(); + assertEquals("test-00002", rs.getString(ExportObjectsInfo.RECORD_ID)); + assertEquals("src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif", + rs.getString(ExportObjectsInfo.FILE_PATH)); + assertEquals("MJM_7_016_LumberMills_IndianCreekTrestle.tif", rs.getString(ExportObjectsInfo.FILENAME)); + + rs.next(); + assertEquals("test-00003", rs.getString(ExportObjectsInfo.RECORD_ID)); + assertEquals("src/test/resources/files/IMG_2377.jpeg", rs.getString(ExportObjectsInfo.FILE_PATH)); + assertEquals("IMG_2377.jpeg", rs.getString(ExportObjectsInfo.FILENAME)); + } finally { + CdmIndexService.closeDbConnection(conn); + } + } + + @Test + public void indexFromCsvMoreFieldsTest() throws Exception { + CdmFieldInfo csvExportFields = fieldService.retrieveFieldsFromCsv(Paths.get("src/test/resources/files/more_fields.csv")); + fieldService.persistFieldsToProject(project, csvExportFields); + setExportedDate(); + CdmIndexOptions options = new CdmIndexOptions(); + options.setCsvFile(Paths.get("src/test/resources/files/more_fields.csv")); + options.setForce(false); + + service.createDatabase(options); + service.indexAllFromCsv(options); + + assertDateIndexedPresent(); + assertRowCount(3); + + CdmFieldInfo fieldInfo = fieldService.loadFieldsFromProject(project); + List exportFields = fieldInfo.listAllExportFields(); + + Connection conn = service.openDbConnection(); + try { + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("select " + String.join(",", exportFields) + + " from " + CdmIndexService.TB_NAME + " order by " + ExportObjectsInfo.RECORD_ID + " asc"); + rs.next(); + assertEquals("test-00001", rs.getString(ExportObjectsInfo.RECORD_ID)); + assertEquals("src/test/resources/files/D2_035_Varners_DrugStore_interior.tif", + rs.getString(ExportObjectsInfo.FILE_PATH)); + assertEquals("D2_035_Varners_DrugStore_interior.tif", rs.getString(ExportObjectsInfo.FILENAME)); + assertEquals("tif", rs.getString("file_type")); + + rs.next(); + assertEquals("test-00002", rs.getString(ExportObjectsInfo.RECORD_ID)); + assertEquals("src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif", + rs.getString(ExportObjectsInfo.FILE_PATH)); + assertEquals("MJM_7_016_LumberMills_IndianCreekTrestle.tif", rs.getString(ExportObjectsInfo.FILENAME)); + assertEquals("tif", rs.getString("file_type")); + + rs.next(); + assertEquals("test-00003", rs.getString(ExportObjectsInfo.RECORD_ID)); + assertEquals("src/test/resources/files/IMG_2377.jpeg", rs.getString(ExportObjectsInfo.FILE_PATH)); + assertEquals("IMG_2377.jpeg", rs.getString(ExportObjectsInfo.FILENAME)); + assertEquals("jpeg", rs.getString("file_type")); + } finally { + CdmIndexService.closeDbConnection(conn); + } + } + private void assertDateIndexedPresent() throws Exception { MigrationProjectProperties props = ProjectPropertiesSerialization.read(project.getProjectPropertiesPath()); assertNotNull(props.getIndexedDate()); diff --git a/src/test/java/edu/unc/lib/boxc/migration/cdm/test/SipServiceHelper.java b/src/test/java/edu/unc/lib/boxc/migration/cdm/test/SipServiceHelper.java index c5963a3a..556e4262 100644 --- a/src/test/java/edu/unc/lib/boxc/migration/cdm/test/SipServiceHelper.java +++ b/src/test/java/edu/unc/lib/boxc/migration/cdm/test/SipServiceHelper.java @@ -21,12 +21,14 @@ import java.util.stream.Collectors; import edu.unc.lib.boxc.auth.api.UserRole; +import edu.unc.lib.boxc.migration.cdm.options.CdmIndexOptions; import edu.unc.lib.boxc.migration.cdm.options.GenerateSourceFileMappingOptions; import edu.unc.lib.boxc.migration.cdm.options.PermissionMappingOptions; import edu.unc.lib.boxc.migration.cdm.services.AggregateFileMappingService; import edu.unc.lib.boxc.migration.cdm.services.ArchivalDestinationsService; import edu.unc.lib.boxc.migration.cdm.services.CdmFileRetrievalService; import edu.unc.lib.boxc.migration.cdm.services.ChompbConfigService; +import edu.unc.lib.boxc.migration.cdm.services.ExportObjectsService; import edu.unc.lib.boxc.migration.cdm.services.GroupMappingService; import edu.unc.lib.boxc.migration.cdm.services.PermissionsService; import edu.unc.lib.boxc.migration.cdm.services.StreamingMetadataService; @@ -97,6 +99,7 @@ public class SipServiceHelper { private PIDMinter pidMinter; private PremisLoggerFactoryImpl premisLoggerFactory; private ChompbConfigService.ChompbConfig chompbConfig; + private ExportObjectsService exportObjectsService; public SipServiceHelper(MigrationProject project, Path filesBasePath) throws IOException { this.sourceFilesBasePath = new File(filesBasePath.toFile(), "source").toPath(); @@ -109,6 +112,8 @@ public SipServiceHelper(MigrationProject project, Path filesBasePath) throws IOE premisLoggerFactory = new PremisLoggerFactoryImpl(); premisLoggerFactory.setPidMinter(pidMinter); fieldService = new CdmFieldService(); + exportObjectsService = new ExportObjectsService(); + exportObjectsService.setProject(project); indexService = new CdmIndexService(); indexService.setProject(project); indexService.setFieldService(fieldService); @@ -291,6 +296,8 @@ public void indexExportData(String descPath) throws Exception { } public void indexExportData(Path fieldsPath, String descPath) throws Exception { + CdmIndexOptions options = new CdmIndexOptions(); + options.setForce(true); Files.copy(fieldsPath, project.getFieldsPath(), REPLACE_EXISTING); Files.copy(Paths.get("src/test/resources/descriptions/" + descPath + "/index/description/desc.all"), CdmFileRetrievalService.getDescAllPath(project), REPLACE_EXISTING); @@ -308,7 +315,7 @@ public void indexExportData(Path fieldsPath, String descPath) throws Exception { }); } project.getProjectProperties().setExportedDate(Instant.now()); - indexService.createDatabase(true); + indexService.createDatabase(options); indexService.indexAll(); ProjectPropertiesSerialization.write(project); } diff --git a/src/test/resources/files/exported_objects.csv b/src/test/resources/files/exported_objects.csv new file mode 100644 index 00000000..19b0a037 --- /dev/null +++ b/src/test/resources/files/exported_objects.csv @@ -0,0 +1,4 @@ +dmrecord,file_path,filename +test-00001,src/test/resources/files/D2_035_Varners_DrugStore_interior.tif,D2_035_Varners_DrugStore_interior.tif +test-00002,src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif,MJM_7_016_LumberMills_IndianCreekTrestle.tif +test-00003,src/test/resources/files/IMG_2377.jpeg,IMG_2377.jpeg \ No newline at end of file diff --git a/src/test/resources/files/more_fields.csv b/src/test/resources/files/more_fields.csv new file mode 100644 index 00000000..53a159fb --- /dev/null +++ b/src/test/resources/files/more_fields.csv @@ -0,0 +1,4 @@ +dmrecord,file_path,filename,file_type +test-00001,src/test/resources/files/D2_035_Varners_DrugStore_interior.tif,D2_035_Varners_DrugStore_interior.tif,tif +test-00002,src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif,MJM_7_016_LumberMills_IndianCreekTrestle.tif,tif +test-00003,src/test/resources/files/IMG_2377.jpeg,IMG_2377.jpeg,jpeg \ No newline at end of file