From f66c92d5fffc60fc0fb594877e7eaa24cccfd3a8 Mon Sep 17 00:00:00 2001 From: krwong Date: Wed, 31 Jul 2024 12:33:02 -0400 Subject: [PATCH] export_objects cmd and tests --- .../unc/lib/boxc/migration/cdm/CLIMain.java | 3 +- .../migration/cdm/ExportObjectsCommand.java | 52 ++++++++ .../cdm/model/ExportObjectsInfo.java | 59 +++++++++ .../migration/cdm/model/MigrationProject.java | 8 ++ .../cdm/services/ExportObjectsService.java | 76 ++++++++++++ .../migration/cdm/ExportObjectsCommandIT.java | 108 ++++++++++++++++ .../services/ExportObjectsServiceTest.java | 117 ++++++++++++++++++ 7 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java create mode 100644 src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java create mode 100644 src/main/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsService.java create mode 100644 src/test/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommandIT.java create mode 100644 src/test/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsServiceTest.java diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/CLIMain.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/CLIMain.java index 677a7e12..8eef1f1d 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/CLIMain.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/CLIMain.java @@ -40,7 +40,8 @@ MigrationTypeReportCommand.class, FilterIndexCommand.class, AggregateFilesCommand.class, - PermissionsCommand.class + PermissionsCommand.class, + ExportObjectsCommand.class }) public class CLIMain implements Callable { @Option(names = { "-w", "--work-dir" }, diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java new file mode 100644 index 00000000..cd852672 --- /dev/null +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommand.java @@ -0,0 +1,52 @@ +package edu.unc.lib.boxc.migration.cdm; + +import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; +import edu.unc.lib.boxc.migration.cdm.services.ExportObjectsService; +import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory; +import org.slf4j.Logger; +import picocli.CommandLine.Command; +import picocli.CommandLine.ParentCommand; + +import java.nio.file.Path; +import java.util.concurrent.Callable; + +import static edu.unc.lib.boxc.migration.cdm.util.CLIConstants.outputLogger; +import static org.slf4j.LoggerFactory.getLogger; + +/** + * @author krwong + */ +@Command(name = "export_objects", + description = "Export record ids and filenames from a source_files.csv mapping.") +public class ExportObjectsCommand implements Callable { + private static final Logger log = getLogger(ExportObjectsCommand.class); + + @ParentCommand + private CLIMain parentCommand; + + private MigrationProject project; + private ExportObjectsService exportObjectsService; + + public void init() throws Exception { + Path currentPath = parentCommand.getWorkingDirectory(); + project = MigrationProjectFactory.loadMigrationProject(currentPath); + exportObjectsService = new ExportObjectsService(); + exportObjectsService.setProject(project); + } + + @Override + public Integer call() { + long start = System.nanoTime(); + try { + init(); + exportObjectsService.exportFilesystemObjects(); + outputLogger.info("Export objects in project {} in {}s", project.getProjectName(), + (System.nanoTime() - start) / 1e9); + return 0; + } catch (Exception e) { + log.error("Failed to export objects in {}", project.getProjectName(), e); + outputLogger.info("Failed to export objects in {}: {}", project.getProjectName(), e.getMessage()); + return 1; + } + } +} diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java new file mode 100644 index 00000000..9d493b37 --- /dev/null +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/model/ExportObjectsInfo.java @@ -0,0 +1,59 @@ +package edu.unc.lib.boxc.migration.cdm.model; + +import java.util.ArrayList; +import java.util.List; + +/** + * Exported objects info for a project + * @author krwong + */ +public class ExportObjectsInfo { + public static final String RECORD_ID = "record_id"; + public static final String FILE_PATH = "file_path"; + public static final String FILENAME = "filename"; + public static final String[] CSV_HEADERS = new String[] {RECORD_ID, FILE_PATH, FILENAME}; + + private List objects; + + public ExportObjectsInfo() { + objects = new ArrayList<>(); + } + + public List getObjects() { + return objects; + } + + public void setObjects(List objects) { + this.objects = objects; + } + + public static class ExportedObject { + private String recordId; + private String filePath; + private String filename; + + public String getRecordId() { + return recordId; + } + + public void setRecordId(String recordId) { + this.recordId = recordId; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + public String getFilename() { + return filename; + } + + public void setFilename(String filename) { + this.filename = filename; + } + } +} diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/model/MigrationProject.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/model/MigrationProject.java index 694e87c4..52941a77 100644 --- a/src/main/java/edu/unc/lib/boxc/migration/cdm/model/MigrationProject.java +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/model/MigrationProject.java @@ -27,6 +27,7 @@ public class MigrationProject { public static final String PERMISSIONS_FILENAME = "patron_permissions.csv"; public static final String PROJECT_SOURCE_CDM = "cdm"; public static final String PROJECT_SOURCE_FILES = "files"; + public static final String EXPORT_OBJECTS_FILENAME = "exported_objects.csv"; private Path projectPath; private MigrationProjectProperties properties; @@ -172,4 +173,11 @@ public Path getPostMigrationReportPath() { public Path getPermissionsPath() { return projectPath.resolve(PERMISSIONS_FILENAME); } + + /** + * @return Path of the exported objects file + */ + public Path getExportObjectsPath() { + return projectPath.resolve(EXPORT_OBJECTS_FILENAME); + } } diff --git a/src/main/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsService.java b/src/main/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsService.java new file mode 100644 index 00000000..f80fae0d --- /dev/null +++ b/src/main/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsService.java @@ -0,0 +1,76 @@ +package edu.unc.lib.boxc.migration.cdm.services; + +import edu.unc.lib.boxc.migration.cdm.exceptions.InvalidProjectStateException; +import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo; +import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; +import edu.unc.lib.boxc.migration.cdm.model.MigrationProjectProperties; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.FilenameUtils; +import org.slf4j.Logger; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.slf4j.LoggerFactory.getLogger; + +/** + * Service for generating exported_objects.csv + * @author krwong + */ +public class ExportObjectsService { + private static final Logger log = getLogger(ExportObjectsService.class); + + private MigrationProject project; + + /** + * Export objects from filesystem source_files.csv mapping + * @throws Exception + */ + public void exportFilesystemObjects() throws Exception { + validateProjectState(); + var sourcePath = project.getSourceFilesMappingPath(); + var exportObjectPath = getExportedObjectsPath(); + + // Simultaneously read from the source_files mapping and write to the exported_objects.csv + try ( + var sourceFilesParser = SourceFileService.openMappingsParser(sourcePath); + var exportObjectsPrinter = openMappingsPrinter(exportObjectPath); + ) { + for (CSVRecord sourceFileRecord : sourceFilesParser) { + String id = sourceFileRecord.get(0); + String filePath = sourceFileRecord.get(2); + String filename = FilenameUtils.getName(sourceFileRecord.get(2)); + exportObjectsPrinter.printRecord(id, filePath, filename); + } + } + } + + private void validateProjectState() { + MigrationProjectProperties props = project.getProjectProperties(); + if (props.getSourceFilesUpdatedDate() == null) { + throw new InvalidProjectStateException("Source files must be mapped"); + } + } + + /** + * @param mappingPath Path CSV will output to. If null, then will output to temporary CSV + * @return CSVPrinter for writing to specified destination + * @throws IOException + */ + public static CSVPrinter openMappingsPrinter(Path mappingPath) throws IOException { + BufferedWriter writer = Files.newBufferedWriter(mappingPath); + return new CSVPrinter(writer, CSVFormat.DEFAULT.withHeader(ExportObjectsInfo.CSV_HEADERS)); + } + + public Path getExportedObjectsPath() { + return project.getExportObjectsPath(); + } + + public void setProject(MigrationProject project) { + this.project = project; + } +} diff --git a/src/test/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommandIT.java b/src/test/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommandIT.java new file mode 100644 index 00000000..b3c89f6e --- /dev/null +++ b/src/test/java/edu/unc/lib/boxc/migration/cdm/ExportObjectsCommandIT.java @@ -0,0 +1,108 @@ +package edu.unc.lib.boxc.migration.cdm; + +import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo; +import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo; +import edu.unc.lib.boxc.migration.cdm.services.ExportObjectsService; +import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory; +import edu.unc.lib.boxc.migration.cdm.test.BxcEnvironmentHelper; +import edu.unc.lib.boxc.migration.cdm.util.ProjectPropertiesSerialization; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertIterableEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ExportObjectsCommandIT extends AbstractCommandIT { + private static final String PROJECT_NAME = "proj"; + private ExportObjectsService exportObjectsService; + + @BeforeEach + public void setup() throws Exception { + project = MigrationProjectFactory.createCdmMigrationProject( + tmpFolder, PROJECT_NAME, null, "user", + null, BxcEnvironmentHelper.DEFAULT_ENV_ID); + exportObjectsService = new ExportObjectsService(); + } + + @Test + public void exportObjectsNoSourceFileTest() throws Exception { + String[] args = new String[] { + "-w", project.getProjectPath().toString(), + "export_objects" + }; + + executeExpectFailure(args); + assertOutputContains("Failed to export objects in proj"); + } + + @Test + public void exportObjectsTest() throws Exception { + writeSourceCsv(sourceMappingBody("testid,," + filesystemSourceFile("IMG_2377.jpeg") + ",", + "test-00001,," + filesystemSourceFile("D2_035_Varners_DrugStore_interior.tif") + ",", + "test-00002,," + filesystemSourceFile("MJM_7_016_LumberMills_IndianCreekTrestle.tif") + ",")); + project.getProjectProperties().setSourceFilesUpdatedDate(Instant.now()); + ProjectPropertiesSerialization.write(project); + + String[] args = new String[] { + "-w", project.getProjectPath().toString(), + "export_objects" + }; + executeExpectSuccess(args); + + Path exportedObjectsPath = project.getExportObjectsPath(); + assertTrue(Files.exists(exportedObjectsPath)); + List rows = listCsvRecords(exportedObjectsPath); + assertEquals(3, rows.size()); + assertIterableEquals(Arrays.asList("testid", "src/test/resources/files/IMG_2377.jpeg", + "IMG_2377.jpeg"), rows.get(0)); + assertIterableEquals(Arrays.asList("test-00001", + "src/test/resources/files/D2_035_Varners_DrugStore_interior.tif", + "D2_035_Varners_DrugStore_interior.tif"), rows.get(1)); + assertIterableEquals(Arrays.asList("test-00002", + "src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif", + "MJM_7_016_LumberMills_IndianCreekTrestle.tif"), rows.get(2)); + } + + private String sourceMappingBody(String... rows) { + return String.join(",", SourceFilesInfo.CSV_HEADERS) + "\n" + + String.join("\n", rows); + } + + private void writeSourceCsv(String mappingBody) throws IOException { + FileUtils.write(project.getSourceFilesMappingPath().toFile(), + mappingBody, StandardCharsets.UTF_8); + } + + private Path filesystemSourceFile(String relPath) { + Path basePath = Path.of("src/test/resources/files"); + return basePath.resolve(relPath); + } + + private List listCsvRecords(Path exportedObjectsPath) throws Exception { + List rows; + try ( + Reader reader = Files.newBufferedReader(exportedObjectsPath); + CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT + .withFirstRecordAsHeader() + .withHeader(ExportObjectsInfo.CSV_HEADERS) + .withTrim()); + ) { + rows = csvParser.getRecords(); + } + return rows; + } +} diff --git a/src/test/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsServiceTest.java b/src/test/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsServiceTest.java new file mode 100644 index 00000000..6262f602 --- /dev/null +++ b/src/test/java/edu/unc/lib/boxc/migration/cdm/services/ExportObjectsServiceTest.java @@ -0,0 +1,117 @@ +package edu.unc.lib.boxc.migration.cdm.services; + +import edu.unc.lib.boxc.migration.cdm.exceptions.InvalidProjectStateException; +import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo; +import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; +import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo; +import edu.unc.lib.boxc.migration.cdm.test.BxcEnvironmentHelper; +import edu.unc.lib.boxc.migration.cdm.util.ProjectPropertiesSerialization; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.io.Reader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertIterableEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.MockitoAnnotations.openMocks; + +public class ExportObjectsServiceTest { + private static final String PROJECT_NAME = "proj"; + private AutoCloseable closeable; + @TempDir + public Path tmpFolder; + + private MigrationProject project; + private ExportObjectsService service; + + @BeforeEach + public void setup() throws Exception { + closeable = openMocks(this); + project = MigrationProjectFactory.createCdmMigrationProject( + tmpFolder, PROJECT_NAME, null, "user", + null, BxcEnvironmentHelper.DEFAULT_ENV_ID); + service = new ExportObjectsService(); + service.setProject(project); + } + + @AfterEach + void closeService() throws Exception { + closeable.close(); + } + + @Test + public void exportObjectsNoSourceFileTest() throws Exception { + try { + service.exportFilesystemObjects(); + } catch (InvalidProjectStateException e) { + assertTrue(e.getMessage().contains("Source files must be mapped")); + } + } + + @Test + public void exportObjectsTest() throws Exception { + writeSourceCsv(sourceMappingBody("testid,," + filesystemSourceFile("IMG_2377.jpeg") + ",", + "test-00001,," + filesystemSourceFile("D2_035_Varners_DrugStore_interior.tif") + ",", + "test-00002,," + filesystemSourceFile("MJM_7_016_LumberMills_IndianCreekTrestle.tif") + ",")); + project.getProjectProperties().setSourceFilesUpdatedDate(Instant.now()); + ProjectPropertiesSerialization.write(project); + Path exportedObjectsPath = project.getExportObjectsPath(); + + service.exportFilesystemObjects(); + + assertTrue(Files.exists(exportedObjectsPath)); + List rows = listCsvRecords(exportedObjectsPath); + assertEquals(3, rows.size()); + assertIterableEquals(Arrays.asList("testid", "src/test/resources/files/IMG_2377.jpeg", + "IMG_2377.jpeg"), rows.get(0)); + assertIterableEquals(Arrays.asList("test-00001", + "src/test/resources/files/D2_035_Varners_DrugStore_interior.tif", + "D2_035_Varners_DrugStore_interior.tif"), rows.get(1)); + assertIterableEquals(Arrays.asList("test-00002", + "src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif", + "MJM_7_016_LumberMills_IndianCreekTrestle.tif"), rows.get(2)); + } + + private String sourceMappingBody(String... rows) { + return String.join(",", SourceFilesInfo.CSV_HEADERS) + "\n" + + String.join("\n", rows); + } + + private void writeSourceCsv(String mappingBody) throws IOException { + FileUtils.write(project.getSourceFilesMappingPath().toFile(), + mappingBody, StandardCharsets.UTF_8); + } + + private Path filesystemSourceFile(String relPath) { + Path basePath = Path.of("src/test/resources/files"); + return basePath.resolve(relPath); + } + + private List listCsvRecords(Path exportedObjectsPath) throws Exception { + List rows; + try ( + Reader reader = Files.newBufferedReader(exportedObjectsPath); + CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT + .withFirstRecordAsHeader() + .withHeader(ExportObjectsInfo.CSV_HEADERS) + .withTrim()); + ) { + rows = csvParser.getRecords(); + } + return rows; + } +}