diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_5_0/MinimiseFileDataInJob_TASK_7358.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_5_0/MinimiseFileDataInJob_TASK_7358.java new file mode 100644 index 0000000000..0726092abb --- /dev/null +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_5_0/MinimiseFileDataInJob_TASK_7358.java @@ -0,0 +1,70 @@ +package org.opencb.opencga.app.migrations.v3.v3_5_0; + +import com.mongodb.client.model.Filters; +import com.mongodb.client.model.Projections; +import com.mongodb.client.model.UpdateOneModel; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.opencb.opencga.catalog.db.api.FileDBAdaptor; +import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptor; +import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; +import org.opencb.opencga.catalog.migration.Migration; +import org.opencb.opencga.catalog.migration.MigrationTool; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +@Migration(id = "minimize_file_data_in_job_7358", + description = "Minimize file data in Job #7358", version = "3.5.0", + language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20250224) +public class MinimiseFileDataInJob_TASK_7358 extends MigrationTool { + + @Override + protected void run() throws Exception { + Bson query = Filters.or( + Filters.exists("attributes._opencga.deletedInputFiles.attributes" , true), + Filters.exists("attributes._opencga.deletedOutputFiles.attributes" , true) + ); + Bson projection = Projections.include("uid", "attributes"); + migrateCollection(Arrays.asList(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_JOB_COLLECTION), + query, projection, (document, bulk) -> { + MongoDBAdaptor.UpdateDocument updateDocument = new MongoDBAdaptor.UpdateDocument(); + Document ocgaAttributes = document.get("attributes", Document.class).get("_opencga", Document.class); + + // Process deleted input files list + List deletedInputFiles = ocgaAttributes.getList("deletedInputFiles", Document.class); + if (deletedInputFiles != null) { + List reducedDeletedInputFiles = new ArrayList<>(deletedInputFiles.size()); + for (Document deletedInputFile : deletedInputFiles) { + reducedDeletedInputFiles.add(getReducedFileDocument(deletedInputFile)); + } + updateDocument.getSet().put("attributes._opencga.deletedInputFiles", reducedDeletedInputFiles); + } + + // Process deleted output files list + List deletedOutputFiles = ocgaAttributes.getList("deletedOutputFiles", Document.class); + if (deletedOutputFiles != null) { + List reducedDeletedOutputFiles = new ArrayList<>(deletedOutputFiles.size()); + for (Document deletedOutputFile : deletedOutputFiles) { + reducedDeletedOutputFiles.add(getReducedFileDocument(deletedOutputFile)); + } + updateDocument.getSet().put("attributes._opencga.deletedOutputFiles", reducedDeletedOutputFiles); + } + + bulk.add(new UpdateOneModel<>(Filters.eq("_id", document.get("_id")), updateDocument.toFinalUpdateDocument())); + }); + } + + private Document getReducedFileDocument(Document file) { + return new Document() + .append(FileDBAdaptor.QueryParams.ID.key(), file.get(FileDBAdaptor.QueryParams.ID.key())) + .append(FileDBAdaptor.QueryParams.UUID.key(), file.get(FileDBAdaptor.QueryParams.UUID.key())) + .append(FileDBAdaptor.QueryParams.PATH.key(), file.get(FileDBAdaptor.QueryParams.PATH.key())) + .append(FileDBAdaptor.QueryParams.URI.key(), file.get(FileDBAdaptor.QueryParams.URI.key())) + .append(FileDBAdaptor.QueryParams.TYPE.key(), file.get(FileDBAdaptor.QueryParams.TYPE.key())) + .append(FileDBAdaptor.QueryParams.FORMAT.key(), file.get(FileDBAdaptor.QueryParams.FORMAT.key())) + .append(FileDBAdaptor.QueryParams.BIOFORMAT.key(), file.get(FileDBAdaptor.QueryParams.BIOFORMAT.key())); + } + +} diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/JobMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/JobMongoDBAdaptor.java index d89e0020f9..d7dd0fea58 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/JobMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/JobMongoDBAdaptor.java @@ -740,6 +740,15 @@ public void forEach(Query query, Consumer action, QueryOptions o void removeFileReferences(ClientSession clientSession, long studyUid, long fileUid, Document file) { UpdateDocument document = new UpdateDocument(); + Document fileCopy = new Document() + .append(FileDBAdaptor.QueryParams.ID.key(), file.get(FileDBAdaptor.QueryParams.ID.key())) + .append(FileDBAdaptor.QueryParams.UUID.key(), file.get(FileDBAdaptor.QueryParams.UUID.key())) + .append(FileDBAdaptor.QueryParams.PATH.key(), file.get(FileDBAdaptor.QueryParams.PATH.key())) + .append(FileDBAdaptor.QueryParams.URI.key(), file.get(FileDBAdaptor.QueryParams.URI.key())) + .append(FileDBAdaptor.QueryParams.TYPE.key(), file.get(FileDBAdaptor.QueryParams.TYPE.key())) + .append(FileDBAdaptor.QueryParams.FORMAT.key(), file.get(FileDBAdaptor.QueryParams.FORMAT.key())) + .append(FileDBAdaptor.QueryParams.BIOFORMAT.key(), file.get(FileDBAdaptor.QueryParams.BIOFORMAT.key())); + String prefix = QueryParams.ATTRIBUTES.key() + "." + Constants.PRIVATE_OPENCGA_ATTRIBUTES + "."; // INPUT @@ -748,7 +757,7 @@ void removeFileReferences(ClientSession clientSession, long studyUid, long fileU .append(QueryParams.INPUT_UID.key(), fileUid); document.getPullAll().put(QueryParams.INPUT.key(), Collections.singletonList(new Document(FileDBAdaptor.QueryParams.UID.key(), fileUid))); - document.getPush().put(prefix + Constants.JOB_DELETED_INPUT_FILES, file); + document.getPush().put(prefix + Constants.JOB_DELETED_INPUT_FILES, fileCopy); Document updateDocument = document.toFinalUpdateDocument(); logger.debug("Removing file from job '{}' field. Query: {}, Update: {}", QueryParams.INPUT.key(), query.toBsonDocument(), @@ -763,7 +772,7 @@ void removeFileReferences(ClientSession clientSession, long studyUid, long fileU document = new UpdateDocument(); document.getPullAll().put(QueryParams.OUTPUT.key(), Collections.singletonList(new Document(FileDBAdaptor.QueryParams.UID.key(), fileUid))); - document.getPush().put(prefix + Constants.JOB_DELETED_OUTPUT_FILES, file); + document.getPush().put(prefix + Constants.JOB_DELETED_OUTPUT_FILES, fileCopy); updateDocument = document.toFinalUpdateDocument(); logger.debug("Removing file from job '{}' field. Query: {}, Update: {}", QueryParams.OUTPUT.key(), query.toBsonDocument(), @@ -777,7 +786,7 @@ void removeFileReferences(ClientSession clientSession, long studyUid, long fileU .append(QueryParams.OUT_DIR_UID.key(), fileUid); document = new UpdateDocument(); document.getSet().put(QueryParams.OUT_DIR.key(), new Document(FileDBAdaptor.QueryParams.UID.key(), -1)); - document.getSet().put(prefix + Constants.JOB_DELETED_OUTPUT_DIRECTORY, file); + document.getSet().put(prefix + Constants.JOB_DELETED_OUTPUT_DIRECTORY, fileCopy); updateDocument = document.toFinalUpdateDocument(); logger.debug("Removing file from job '{}' field. Query: {}, Update: {}", QueryParams.OUT_DIR.key(), query.toBsonDocument(), diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FileManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FileManagerTest.java index 5e655998ab..a1081a6f8e 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FileManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FileManagerTest.java @@ -50,6 +50,7 @@ import org.opencb.opencga.core.models.family.Family; import org.opencb.opencga.core.models.file.*; import org.opencb.opencga.core.models.individual.Individual; +import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.panel.Panel; import org.opencb.opencga.core.models.sample.Sample; import org.opencb.opencga.core.models.study.*; @@ -1911,6 +1912,37 @@ public void removeFileReferencesFromSamplesOnFileDeleteTest() throws CatalogExce fileManager.get(studyFqn, fileId, QueryOptions.empty(), ownerToken).first(); } + @Test + public void removeFileReferencesFromJobOnFileDeleteTest() throws CatalogException { + String fileId = "data:test:folder:test_0.1K.png"; + File file = fileManager.get(studyFqn, fileId, QueryOptions.empty(), ownerToken).first(); + assertFalse(file.getSampleIds().isEmpty()); + assertEquals(5, file.getSampleIds().size()); + + Job job = new Job() + .setId("jobId") + .setInput(Collections.singletonList(file)); + job = catalogManager.getJobManager().create(studyFqn, job, INCLUDE_RESULT, ownerToken).first(); + assertEquals(1, job.getInput().size()); + assertEquals(fileId, job.getInput().get(0).getId()); + assertNotNull(job.getInput().get(0).getFormat()); + assertTrue(job.getAttributes().isEmpty()); + + setToPendingDelete(studyFqn, new Query(FileDBAdaptor.QueryParams.ID.key(), fileId)); + fileManager.delete(studyFqn, Collections.singletonList(fileId), new QueryOptions(Constants.SKIP_TRASH, true), ownerToken); + + Job job1 = catalogManager.getJobManager().get(studyFqn, job.getId(), QueryOptions.empty(), ownerToken).first(); + assertEquals(0, job1.getInput().size()); + assertFalse(job1.getAttributes().isEmpty()); + + Map opencgaAttributes = (Map) job1.getAttributes().get(Constants.PRIVATE_OPENCGA_ATTRIBUTES); + List> deletedInputFiles = (List>) opencgaAttributes.get(Constants.JOB_DELETED_INPUT_FILES); + assertEquals(1, deletedInputFiles.size()); + assertEquals(fileId, deletedInputFiles.get(0).get("id")); + assertEquals(file.getFormat().name(), deletedInputFiles.get(0).get("format")); + assertFalse(deletedInputFiles.get(0).containsKey("attributes")); + } + @Test public void testDeleteLeafFolder() throws CatalogException, IOException {