From 149577b9f6af15e8341867f6879cd7a2b05022e7 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 29 May 2024 16:33:31 +0200 Subject: [PATCH 001/122] Prepare new development version 3.0.0.2-SNAPSHOT --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index f360b51b82..eb4825ee88 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index a211147e29..92d7e27fc3 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 4b0827a105..dba2d4ff1a 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index b86efb944d..8a221b55fe 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index ef56059fcb..f2ea05a986 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 8942fdd2d1..acaffb174f 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index fb06b5d2b9..d0d14e9506 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 846323d417..f93f5ffd04 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 2715a9b06a..fd9869835d 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 896868defd..640a4a655e 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index b62ec3d581..74320f628a 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index 35d022c328..e2f710f5e7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index 48b48e0a40..614ad7332c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index b2ad7976f6..f768ed47aa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index e10b7910ee..75f6adc95e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 87a073c4d5..46993e19f5 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 8da62fe6c4..bf1c0dc3a0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index 298103a6ce..87b41711fa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 1062a0712d..2bb2e0fea4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index 392730030f..bb429c443f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 608cab8886..e029d3b5bf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index 1b0e3907b9..b2648803b3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 440a1fec50..b4258b03d3 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 98bfb2231e..5a605a0565 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 0615d0daa9..33ffd81655 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index d43e29d0f3..711d494413 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index efdab0d652..0c2ea552a9 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.1 + 3.0.0.2-SNAPSHOT pom OpenCGA From d435ec364d156f3f0d03fcc5583c82a7ce4ac794 Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 23 Sep 2024 14:59:16 +0200 Subject: [PATCH 002/122] catalog: create cohort does not increment sample version, #TASK-6754 --- .../db/mongodb/AnnotationMongoDBAdaptor.java | 16 +- .../ClinicalAnalysisMongoDBAdaptor.java | 14 +- .../db/mongodb/CohortMongoDBAdaptor.java | 5 +- .../db/mongodb/FamilyMongoDBAdaptor.java | 248 ++++++++++-------- .../db/mongodb/FileMongoDBAdaptor.java | 6 +- .../db/mongodb/IndividualMongoDBAdaptor.java | 175 ++++++------ .../db/mongodb/SampleMongoDBAdaptor.java | 59 +++-- .../SnapshotVersionedMongoDBAdaptor.java | 61 ++++- 8 files changed, 356 insertions(+), 228 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/AnnotationMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/AnnotationMongoDBAdaptor.java index 02ce564972..ebede7d04f 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/AnnotationMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/AnnotationMongoDBAdaptor.java @@ -70,11 +70,23 @@ public abstract class AnnotationMongoDBAdaptor extends CatalogMongoDBAdaptor protected abstract MongoDBCollection getCollection(); - abstract OpenCGAResult transactionalUpdate(ClientSession clientSession, T entry, ObjectMap parameters, + OpenCGAResult transactionalUpdate(ClientSession clientSession, T entry, ObjectMap parameters, List variableSetList, QueryOptions queryOptions) + throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { + return transactionalUpdate(clientSession, entry, parameters, variableSetList, queryOptions, true); + } + + abstract OpenCGAResult transactionalUpdate(ClientSession clientSession, T entry, ObjectMap parameters, + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException; - abstract OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) + OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) + throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { + return transactionalUpdate(clientSession, studyUid, query, updateDocument, true); + } + + abstract OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument, + boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException; public enum AnnotationSetParams implements QueryParam { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ClinicalAnalysisMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ClinicalAnalysisMongoDBAdaptor.java index 7a359e5a7e..1e24abeead 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ClinicalAnalysisMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ClinicalAnalysisMongoDBAdaptor.java @@ -355,20 +355,21 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, ClinicalAnalysis @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, ClinicalAnalysis entry, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, + boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { throw new NotImplementedException("Please call to the other transactionalUpdate method passing the ClinicalAudit list"); } @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, - UpdateDocument updateDocument) + UpdateDocument updateDocument, boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); Document updateOperation = updateDocument.toFinalUpdateDocument(); if (!updateOperation.isEmpty()) { - return versionedMongoDBAdaptor.update(clientSession, query, entryList -> { + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateClinicalReferences = (clinicalList) -> { logger.debug("Update clinical analysis. Query: {}, Update: {}", query.toBsonDocument(), updateDocument); DataResult update = clinicalCollection.update(clientSession, query, updateOperation, null); @@ -385,7 +386,12 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, logger.debug("{} clinical analyses successfully updated", update.getNumUpdated()); return endWrite(tmpStartTime, update.getNumMatches(), update.getNumUpdated(), Collections.emptyList()); - }, null, null); + }; + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, query, null, updateClinicalReferences, null, null); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, query, null, updateClinicalReferences); + } } else { throw new CatalogDBException("Nothing to update"); } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/CohortMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/CohortMongoDBAdaptor.java index 6b948a722b..0ca4d2477e 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/CohortMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/CohortMongoDBAdaptor.java @@ -286,7 +286,7 @@ public OpenCGAResult update(Query query, ObjectMap parameters, List @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, Cohort cohort, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { long tmpStartTime = startQuery(); Query tmpQuery = new Query() @@ -341,7 +341,8 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, Cohort co } @Override - OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) + OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument, + boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FamilyMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FamilyMongoDBAdaptor.java index 97bfd92a67..5ee3d65675 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FamilyMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FamilyMongoDBAdaptor.java @@ -378,7 +378,7 @@ public OpenCGAResult update(Query query, ObjectMap parameters, List @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, Family family, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { long tmpStartTime = startQuery(); long studyUid = family.getStudyUid(); @@ -397,145 +397,161 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, Family fa QueryParams.STUDY_UID.key(), QueryParams.MEMBERS.key() + "." + IndividualDBAdaptor.QueryParams.ID.key()); Bson bsonQuery = parseQuery(tmpQuery); - return versionedMongoDBAdaptor.update(clientSession, bsonQuery, fieldsToInclude, (entrylist) -> { - String familyId = entrylist.get(0).getString(QueryParams.ID.key()); - DataResult result = updateAnnotationSets(clientSession, studyUid, familyUid, parameters, - variableSetList, queryOptions, true); - List familyMemberIds = family.getMembers().stream().map(Individual::getId).collect(Collectors.toList()); - boolean updateRoles = queryOptions.getBoolean(ParamConstants.FAMILY_UPDATE_ROLES_PARAM); - boolean updatePedigree = queryOptions.getBoolean(ParamConstants.FAMILY_UPDATE_PEDIGREEE_GRAPH_PARAM); - if (CollectionUtils.isNotEmpty(parameters.getAsList(QueryParams.MEMBERS.key()))) { - List newIndividuals = parameters.getAsList(QueryParams.MEMBERS.key(), Map.class); - Set newIndividualIds = newIndividuals.stream().map(i -> (String) i.get(IndividualDBAdaptor.QueryParams.ID - .key())).collect(Collectors.toSet()); - - Set currentIndividualIds = family.getMembers().stream().map(Individual::getId).collect(Collectors.toSet()); - - // Obtain new members to be added to the family - List missingIndividualIds = new ArrayList<>(); - for (String newIndividualId : newIndividualIds) { - if (!currentIndividualIds.contains(newIndividualId)) { - missingIndividualIds.add(newIndividualId); - } - } - - // Obtain members to remove from family - List oldIndividualIds = new ArrayList<>(); - for (String currentIndividualId : currentIndividualIds) { - if (!newIndividualIds.contains(currentIndividualId)) { - oldIndividualIds.add(currentIndividualId); - } - } - updateFamilyReferenceInIndividuals(clientSession, family, missingIndividualIds, oldIndividualIds); - updateRoles = true; - familyMemberIds = new ArrayList<>(newIndividualIds); + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateFamilyReferences = (familyList) -> { + String familyId = familyList.get(0).getString(QueryParams.ID.key()); + DataResult result = updateAnnotationSets(clientSession, studyUid, familyUid, parameters, + variableSetList, queryOptions, true); + List familyMemberIds = family.getMembers().stream().map(Individual::getId).collect(Collectors.toList()); + boolean updateRoles = queryOptions.getBoolean(ParamConstants.FAMILY_UPDATE_ROLES_PARAM); + boolean updatePedigree = queryOptions.getBoolean(ParamConstants.FAMILY_UPDATE_PEDIGREEE_GRAPH_PARAM); + if (CollectionUtils.isNotEmpty(parameters.getAsList(QueryParams.MEMBERS.key()))) { + List newIndividuals = parameters.getAsList(QueryParams.MEMBERS.key(), Map.class); + Set newIndividualIds = newIndividuals.stream().map(i -> (String) i.get(IndividualDBAdaptor.QueryParams.ID + .key())).collect(Collectors.toSet()); + + Set currentIndividualIds = family.getMembers().stream().map(Individual::getId).collect(Collectors.toSet()); + + // Obtain new members to be added to the family + List missingIndividualIds = new ArrayList<>(); + for (String newIndividualId : newIndividualIds) { + if (!currentIndividualIds.contains(newIndividualId)) { + missingIndividualIds.add(newIndividualId); } + } - if (updateRoles) { - // CALCULATE ROLES - if (!familyMemberIds.isEmpty()) { - // Fetch individuals with relevant information to guess the relationship - Query individualQuery = new Query() - .append(IndividualDBAdaptor.QueryParams.STUDY_UID.key(), studyUid) - .append(IndividualDBAdaptor.QueryParams.ID.key(), familyMemberIds); - QueryOptions relationshipOptions = dbAdaptorFactory.getCatalogIndividualDBAdaptor().fixOptionsForRelatives( - null); - OpenCGAResult memberResult = dbAdaptorFactory.getCatalogIndividualDBAdaptor().get(clientSession, - individualQuery, relationshipOptions); - family.setMembers(memberResult.getResults()); - Map> roles = calculateRoles(clientSession, studyUid, family); - parameters.put(QueryParams.ROLES.key(), roles); - } else { - parameters.put(QueryParams.ROLES.key(), Collections.emptyMap()); - } + // Obtain members to remove from family + List oldIndividualIds = new ArrayList<>(); + for (String currentIndividualId : currentIndividualIds) { + if (!newIndividualIds.contains(currentIndividualId)) { + oldIndividualIds.add(currentIndividualId); } + } - if (updatePedigree && !updateRoles && !parameters.containsKey(QueryParams.DISORDERS.key())) { - PedigreeGraph pedigreeGraph = computePedigreeGraph(clientSession, family); - parameters.put(QueryParams.PEDIGREE_GRAPH.key(), pedigreeGraph); - } + updateFamilyReferenceInIndividuals(clientSession, family, missingIndividualIds, oldIndividualIds); + updateRoles = true; + familyMemberIds = new ArrayList<>(newIndividualIds); + } - Document familyUpdate = parseAndValidateUpdateParams(clientSession, parameters, tmpQuery).toFinalUpdateDocument(); + if (updateRoles) { + // CALCULATE ROLES + if (!familyMemberIds.isEmpty()) { + // Fetch individuals with relevant information to guess the relationship + Query individualQuery = new Query() + .append(IndividualDBAdaptor.QueryParams.STUDY_UID.key(), studyUid) + .append(IndividualDBAdaptor.QueryParams.ID.key(), familyMemberIds); + QueryOptions relationshipOptions = dbAdaptorFactory.getCatalogIndividualDBAdaptor().fixOptionsForRelatives( + null); + OpenCGAResult memberResult = dbAdaptorFactory.getCatalogIndividualDBAdaptor().get(clientSession, + individualQuery, relationshipOptions); + family.setMembers(memberResult.getResults()); + Map> roles = calculateRoles(clientSession, studyUid, family); + parameters.put(QueryParams.ROLES.key(), roles); + } else { + parameters.put(QueryParams.ROLES.key(), Collections.emptyMap()); + } + } - if (familyUpdate.isEmpty() && result.getNumUpdated() == 0) { - if (!parameters.isEmpty()) { - logger.error("Non-processed update parameters: {}", parameters.keySet()); - } - throw new CatalogDBException("Nothing to be updated"); - } + if (updatePedigree && !updateRoles && !parameters.containsKey(QueryParams.DISORDERS.key())) { + PedigreeGraph pedigreeGraph = computePedigreeGraph(clientSession, family); + parameters.put(QueryParams.PEDIGREE_GRAPH.key(), pedigreeGraph); + } + + Document familyUpdate = parseAndValidateUpdateParams(clientSession, parameters, tmpQuery).toFinalUpdateDocument(); + + if (familyUpdate.isEmpty() && result.getNumUpdated() == 0) { + if (!parameters.isEmpty()) { + logger.error("Non-processed update parameters: {}", parameters.keySet()); + } + throw new CatalogDBException("Nothing to be updated"); + } - List events = new ArrayList<>(); - if (!familyUpdate.isEmpty()) { - Bson finalQuery = parseQuery(tmpQuery); + List events = new ArrayList<>(); + if (!familyUpdate.isEmpty()) { + Bson finalQuery = parseQuery(tmpQuery); - logger.debug("Family update: query : {}, update: {}", finalQuery.toBsonDocument(), familyUpdate.toBsonDocument()); - result = familyCollection.update(clientSession, finalQuery, familyUpdate, new QueryOptions("multi", true)); + logger.debug("Family update: query : {}, update: {}", finalQuery.toBsonDocument(), familyUpdate.toBsonDocument()); + result = familyCollection.update(clientSession, finalQuery, familyUpdate, new QueryOptions("multi", true)); - // Compute pedigree graph - if (updateRoles || parameters.containsKey(QueryParams.DISORDERS.key())) { - PedigreeGraph pedigreeGraph = computePedigreeGraph(clientSession, family); - Document pedigreeGraphDoc = getMongoDBDocument(pedigreeGraph, "PedigreeGraph"); + // Compute pedigree graph + if (updateRoles || parameters.containsKey(QueryParams.DISORDERS.key())) { + PedigreeGraph pedigreeGraph = computePedigreeGraph(clientSession, family); + Document pedigreeGraphDoc = getMongoDBDocument(pedigreeGraph, "PedigreeGraph"); - UpdateDocument updateDocument = new UpdateDocument() - .setSet(new Document(QueryParams.PEDIGREE_GRAPH.key(), pedigreeGraphDoc)); - familyUpdate = updateDocument.toFinalUpdateDocument(); - familyCollection.update(clientSession, finalQuery, familyUpdate, new QueryOptions("multi", true)); - } + UpdateDocument updateDocument = new UpdateDocument() + .setSet(new Document(QueryParams.PEDIGREE_GRAPH.key(), pedigreeGraphDoc)); + familyUpdate = updateDocument.toFinalUpdateDocument(); + familyCollection.update(clientSession, finalQuery, familyUpdate, new QueryOptions("multi", true)); + } - if (parameters.containsKey(QueryParams.ID.key())) { - String newFamilyId = parameters.getString(QueryParams.ID.key()); - - // Fetch members (we don't trust those from the Family object because they could have been updated previously) - Query query = new Query() - .append(IndividualDBAdaptor.QueryParams.FAMILY_IDS.key(), familyId) - .append(IndividualDBAdaptor.QueryParams.STUDY_UID.key(), studyUid); - OpenCGAResult individualResult = dbAdaptorFactory.getCatalogIndividualDBAdaptor().get(clientSession, - query, IndividualManager.INCLUDE_INDIVIDUAL_IDS); - List memberIds = individualResult.getResults().stream().map(Individual::getId) - .collect(Collectors.toList()); - - // Remove familyId from all members - updateFamilyReferenceInIndividuals(clientSession, family, null, memberIds); - family.setId(newFamilyId); - updateFamilyReferenceInIndividuals(clientSession, family, memberIds, null); - } + if (parameters.containsKey(QueryParams.ID.key())) { + String newFamilyId = parameters.getString(QueryParams.ID.key()); + + // Fetch members (we don't trust those from the Family object because they could have been updated previously) + Query query = new Query() + .append(IndividualDBAdaptor.QueryParams.FAMILY_IDS.key(), familyId) + .append(IndividualDBAdaptor.QueryParams.STUDY_UID.key(), studyUid); + OpenCGAResult individualResult = dbAdaptorFactory.getCatalogIndividualDBAdaptor().get(clientSession, + query, IndividualManager.INCLUDE_INDIVIDUAL_IDS); + List memberIds = individualResult.getResults().stream().map(Individual::getId) + .collect(Collectors.toList()); + + // Remove familyId from all members + updateFamilyReferenceInIndividuals(clientSession, family, null, memberIds); + family.setId(newFamilyId); + updateFamilyReferenceInIndividuals(clientSession, family, memberIds, null); + } - if (result.getNumMatches() == 0) { - throw new CatalogDBException("Family " + familyId + " not found"); - } - if (result.getNumUpdated() == 0) { - events.add(new Event(Event.Type.WARNING, familyId, "Family was already updated")); - } - logger.debug("Family {} successfully updated", familyId); - } + if (result.getNumMatches() == 0) { + throw new CatalogDBException("Family " + familyId + " not found"); + } + if (result.getNumUpdated() == 0) { + events.add(new Event(Event.Type.WARNING, familyId, "Family was already updated")); + } + logger.debug("Family {} successfully updated", familyId); + } + + return endWrite(tmpStartTime, 1, 1, events); + }; - return endWrite(tmpStartTime, 1, 1, events); - }, Arrays.asList(QueryParams.MEMBERS_ID.key(), QueryParams.MEMBERS_SAMPLES_ID.key()), - this::iterator, (DBIterator iterator) -> updateReferencesAfterFamilyVersionIncrement(clientSession, iterator)); + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, bsonQuery, fieldsToInclude, updateFamilyReferences, + Arrays.asList(QueryParams.MEMBERS_ID.key(), QueryParams.MEMBERS_SAMPLES_ID.key()), + this::iterator, (DBIterator iterator) -> updateReferencesAfterFamilyVersionIncrement(clientSession, iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, bsonQuery, fieldsToInclude, updateFamilyReferences); + } } @Override - OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) + OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument, + boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); List fieldsToInclude = Arrays.asList(QueryParams.ID.key(), QueryParams.UID.key(), QueryParams.VERSION.key(), QueryParams.STUDY_UID.key(), QueryParams.MEMBERS.key() + "." + IndividualDBAdaptor.QueryParams.ID.key()); - return versionedMongoDBAdaptor.update(clientSession, query, fieldsToInclude, (entrylist) -> { - Document familyUpdate = updateDocument.toFinalUpdateDocument(); + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateFamilyReferences = (familyList) -> { + Document familyUpdate = updateDocument.toFinalUpdateDocument(); - if (familyUpdate.isEmpty()) { - throw new CatalogDBException("Nothing to be updated"); - } + if (familyUpdate.isEmpty()) { + throw new CatalogDBException("Nothing to be updated"); + } - logger.debug("Family update: query : {}, update: {}", query.toBsonDocument(), familyUpdate.toBsonDocument()); - DataResult result = familyCollection.update(clientSession, query, familyUpdate, - new QueryOptions(MongoDBCollection.MULTI, true)); - logger.debug("{} families successfully updated", result.getNumUpdated()); - return endWrite(tmpStartTime, result.getNumMatches(), result.getNumUpdated(), Collections.emptyList()); - }, Arrays.asList(QueryParams.MEMBERS_ID.key(), QueryParams.MEMBERS_SAMPLES_ID.key()), - this::iterator, (DBIterator iterator) -> updateReferencesAfterFamilyVersionIncrement(clientSession, iterator)); + logger.debug("Family update: query : {}, update: {}", query.toBsonDocument(), familyUpdate.toBsonDocument()); + DataResult result = familyCollection.update(clientSession, query, familyUpdate, + new QueryOptions(MongoDBCollection.MULTI, true)); + logger.debug("{} families successfully updated", result.getNumUpdated()); + return endWrite(tmpStartTime, result.getNumMatches(), result.getNumUpdated(), Collections.emptyList()); + }; + + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, query, fieldsToInclude, updateFamilyReferences, + Arrays.asList(QueryParams.MEMBERS_ID.key(), QueryParams.MEMBERS_SAMPLES_ID.key()), + this::iterator, (DBIterator iterator) -> updateReferencesAfterFamilyVersionIncrement(clientSession, iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, query, fieldsToInclude, updateFamilyReferences); + } } private PedigreeGraph computePedigreeGraph(ClientSession clientSession, Family family) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FileMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FileMongoDBAdaptor.java index 0f8383a990..7617a49d20 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FileMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/FileMongoDBAdaptor.java @@ -380,7 +380,7 @@ public OpenCGAResult update(Query query, ObjectMap parameters, List @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, File file, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); long studyUid = file.getStudyUid(); @@ -438,8 +438,8 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, File file, } @Override - OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) - throws CatalogDBException { + OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument, + boolean incrementVersion) throws CatalogDBException { long tmpStartTime = startQuery(); Document fileUpdate = updateDocument.toFinalUpdateDocument(); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/IndividualMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/IndividualMongoDBAdaptor.java index 03a6704af1..c3f65b924a 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/IndividualMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/IndividualMongoDBAdaptor.java @@ -374,7 +374,7 @@ public OpenCGAResult update(Query query, ObjectMap parameters, List @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, Individual individual, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { long tmpStartTime = startQuery(); long studyUid = individual.getStudyUid(); @@ -392,106 +392,123 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, Indiv QueryParams.STUDY_UID.key()); Bson bson = parseQuery(tmpQuery); - return versionedMongoDBAdaptor.update(clientSession, bson, fieldsToInclude, (entryList) -> { - String individualId = entryList.get(0).getString(QueryParams.ID.key()); - DataResult result = updateAnnotationSets(clientSession, studyUid, individualUid, parameters, - variableSetList, queryOptions, true); - UpdateDocument updateDocument = parseAndValidateUpdateParams(clientSession, parameters, tmpQuery, queryOptions); - Document individualUpdate = updateDocument.toFinalUpdateDocument(); + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateIndividualReferences = (individualList) -> { + String individualId = individualList.get(0).getString(QueryParams.ID.key()); + DataResult result = updateAnnotationSets(clientSession, studyUid, individualUid, parameters, + variableSetList, queryOptions, true); + UpdateDocument updateDocument = parseAndValidateUpdateParams(clientSession, parameters, tmpQuery, queryOptions); + Document individualUpdate = updateDocument.toFinalUpdateDocument(); + + if (individualUpdate.isEmpty() && result.getNumUpdated() == 0) { + if (!parameters.isEmpty()) { + logger.error("Non-processed update parameters: {}", parameters.keySet()); + } + throw new CatalogDBException("Nothing to be updated"); + } - if (individualUpdate.isEmpty() && result.getNumUpdated() == 0) { - if (!parameters.isEmpty()) { - logger.error("Non-processed update parameters: {}", parameters.keySet()); - } - throw new CatalogDBException("Nothing to be updated"); - } + List events = new ArrayList<>(); + if (!individualUpdate.isEmpty()) { + Bson finalQuery = parseQuery(tmpQuery); - List events = new ArrayList<>(); - if (!individualUpdate.isEmpty()) { - Bson finalQuery = parseQuery(tmpQuery); + logger.debug("Individual update: query : {}, update: {}", finalQuery.toBsonDocument(), + individualUpdate.toBsonDocument()); - logger.debug("Individual update: query : {}, update: {}", finalQuery.toBsonDocument(), - individualUpdate.toBsonDocument()); + result = individualCollection.update(clientSession, finalQuery, individualUpdate, new QueryOptions("multi", true)); - result = individualCollection.update(clientSession, finalQuery, individualUpdate, new QueryOptions("multi", true)); + if (result.getNumMatches() == 0) { + throw new CatalogDBException("Individual " + individualId + " not found"); + } + if (result.getNumUpdated() == 0) { + events.add(new Event(Event.Type.WARNING, individualId, "Individual was already updated")); + } - if (result.getNumMatches() == 0) { - throw new CatalogDBException("Individual " + individualId + " not found"); - } - if (result.getNumUpdated() == 0) { - events.add(new Event(Event.Type.WARNING, individualId, "Individual was already updated")); - } + if (!updateDocument.getAttributes().isEmpty()) { + List addedSamples = updateDocument.getAttributes().getAsLongList("ADDED_SAMPLES"); + List removedSamples = updateDocument.getAttributes().getAsLongList("REMOVED_SAMPLES"); - if (!updateDocument.getAttributes().isEmpty()) { - List addedSamples = updateDocument.getAttributes().getAsLongList("ADDED_SAMPLES"); - List removedSamples = updateDocument.getAttributes().getAsLongList("REMOVED_SAMPLES"); + // Set new individual reference + dbAdaptorFactory.getCatalogSampleDBAdaptor().updateIndividualFromSampleCollection(clientSession, + studyUid, addedSamples, individualId); - // Set new individual reference - dbAdaptorFactory.getCatalogSampleDBAdaptor().updateIndividualFromSampleCollection(clientSession, - studyUid, addedSamples, individualId); + // Set individual reference to "" + dbAdaptorFactory.getCatalogSampleDBAdaptor().updateIndividualFromSampleCollection(clientSession, + studyUid, removedSamples, ""); + } - // Set individual reference to "" - dbAdaptorFactory.getCatalogSampleDBAdaptor().updateIndividualFromSampleCollection(clientSession, - studyUid, removedSamples, ""); - } + // If the list of disorders or phenotypes is altered, we will need to update the corresponding effective lists + // of the families associated (if any) + if (parameters.containsKey(QueryParams.DISORDERS.key()) || parameters.containsKey(QueryParams.PHENOTYPES.key())) { + recalculateFamilyDisordersPhenotypes(clientSession, studyUid, individualUid); + } - // If the list of disorders or phenotypes is altered, we will need to update the corresponding effective lists - // of the families associated (if any) - if (parameters.containsKey(QueryParams.DISORDERS.key()) || parameters.containsKey(QueryParams.PHENOTYPES.key())) { - recalculateFamilyDisordersPhenotypes(clientSession, studyUid, individualUid); - } + if (StringUtils.isNotEmpty(parameters.getString(QueryParams.ID.key()))) { + // We need to update the individual id reference in all its samples + dbAdaptorFactory.getCatalogSampleDBAdaptor().updateIndividualIdFromSamples(clientSession, studyUid, + individualId, parameters.getString(QueryParams.ID.key())); - if (StringUtils.isNotEmpty(parameters.getString(QueryParams.ID.key()))) { - // We need to update the individual id reference in all its samples - dbAdaptorFactory.getCatalogSampleDBAdaptor().updateIndividualIdFromSamples(clientSession, studyUid, - individualId, parameters.getString(QueryParams.ID.key())); + // Update the family roles + familyDBAdaptor.updateIndividualIdFromFamilies(clientSession, studyUid, individualUid, individualId, + parameters.getString(QueryParams.ID.key())); + } - // Update the family roles - familyDBAdaptor.updateIndividualIdFromFamilies(clientSession, studyUid, individualUid, individualId, - parameters.getString(QueryParams.ID.key())); - } + if (parameters.containsKey(QueryParams.FATHER_UID.key()) || parameters.containsKey(QueryParams.MOTHER_UID.key())) { + // If the parents have changed, we need to check family roles + recalculateFamilyRolesForMember(clientSession, studyUid, individualUid); + } - if (parameters.containsKey(QueryParams.FATHER_UID.key()) || parameters.containsKey(QueryParams.MOTHER_UID.key())) { - // If the parents have changed, we need to check family roles - recalculateFamilyRolesForMember(clientSession, studyUid, individualUid); - } + logger.debug("Individual {} successfully updated", individualId); + } - logger.debug("Individual {} successfully updated", individualId); - } + return endWrite(tmpStartTime, 1, 1, events); + }; - return endWrite(tmpStartTime, 1, 1, events); - }, Collections.singletonList(QueryParams.SAMPLES_IDS.key()), this::iterator, - (DBIterator iterator) -> updateReferencesAfterIndividualVersionIncrement(clientSession, studyUid, iterator)); + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, bson, fieldsToInclude, updateIndividualReferences, + Collections.singletonList(QueryParams.SAMPLES_IDS.key()), this::iterator, + (DBIterator iterator) -> updateReferencesAfterIndividualVersionIncrement(clientSession, studyUid, + iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, bson, fieldsToInclude, updateIndividualReferences); + } } - OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) + OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument, + boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); List fieldsToInclude = Arrays.asList(QueryParams.ID.key(), QueryParams.UID.key(), QueryParams.VERSION.key(), QueryParams.STUDY_UID.key()); - return versionedMongoDBAdaptor.update(clientSession, query, fieldsToInclude, (entryList) -> { - List events = new ArrayList<>(); - Document update = updateDocument.toFinalUpdateDocument(); - if (!update.isEmpty()) { - logger.debug("Individual update: query : {}, update: {}", query.toBsonDocument(), update.toBsonDocument()); - DataResult result = individualCollection.update(clientSession, query, update, - new QueryOptions(MongoDBCollection.MULTI, true)); - List individualIds = entryList.stream().map(x -> x.getString(QueryParams.ID.key())) - .collect(Collectors.toList()); - if (result.getNumUpdated() == 0) { - for (String individualId : individualIds) { - events.add(new Event(Event.Type.WARNING, individualId, "Individual was already updated")); - } - } - - logger.debug("Individual(s) {} successfully updated", StringUtils.join(individualIds, ", ")); - return endWrite(tmpStartTime, result.getNumMatches(), result.getNumUpdated(), events); - } else { - return endWrite(tmpStartTime, entryList.size(), 0, events); + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateIndividualReferences = (individualList) -> { + List events = new ArrayList<>(); + Document update = updateDocument.toFinalUpdateDocument(); + if (!update.isEmpty()) { + logger.debug("Individual update: query : {}, update: {}", query.toBsonDocument(), update.toBsonDocument()); + DataResult result = individualCollection.update(clientSession, query, update, + new QueryOptions(MongoDBCollection.MULTI, true)); + List individualIds = individualList.stream().map(x -> x.getString(QueryParams.ID.key())) + .collect(Collectors.toList()); + if (result.getNumUpdated() == 0) { + for (String individualId : individualIds) { + events.add(new Event(Event.Type.WARNING, individualId, "Individual was already updated")); } - }, Collections.singletonList(QueryParams.SAMPLES_IDS.key()), this::iterator, - (DBIterator iterator) -> updateReferencesAfterIndividualVersionIncrement(clientSession, studyUid, iterator)); + } + + logger.debug("Individual(s) {} successfully updated", StringUtils.join(individualIds, ", ")); + return endWrite(tmpStartTime, result.getNumMatches(), result.getNumUpdated(), events); + } else { + return endWrite(tmpStartTime, individualList.size(), 0, events); + } + }; + + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, query, fieldsToInclude, updateIndividualReferences, + Collections.singletonList(QueryParams.SAMPLES_IDS.key()), this::iterator, + (DBIterator iterator) -> updateReferencesAfterIndividualVersionIncrement(clientSession, studyUid, + iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, query, fieldsToInclude, updateIndividualReferences); + } } // If we know the study uid, we should be calling to the other transactionalUpdate method that receives the entire object instead of diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/SampleMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/SampleMongoDBAdaptor.java index d9058193e7..e4a550090c 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/SampleMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/SampleMongoDBAdaptor.java @@ -62,6 +62,7 @@ import java.util.*; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import java.util.function.Function; import java.util.function.UnaryOperator; import java.util.stream.Collectors; @@ -262,7 +263,7 @@ public OpenCGAResult update(long uid, ObjectMap parameters, List va try { return runTransaction(clientSession -> privateUpdate(clientSession, documentResult.first(), parameters, variableSetList, - queryOptions)); + queryOptions, true)); } catch (CatalogException e) { logger.error("Could not update sample {}: {}", sampleId, e.getMessage(), e); throw new CatalogDBException("Could not update sample " + sampleId + ": " + e.getMessage(), e.getCause()); @@ -297,7 +298,7 @@ public OpenCGAResult update(Query query, ObjectMap parameters, List String sampleId = sampleDocument.getString(QueryParams.ID.key()); try { result.append(runTransaction(clientSession -> privateUpdate(clientSession, sampleDocument, parameters, variableSetList, - queryOptions))); + queryOptions, true))); } catch (CatalogException e) { logger.error("Could not update sample {}: {}", sampleId, e.getMessage(), e); result.getEvents().add(new Event(Event.Type.ERROR, sampleId, e.getMessage())); @@ -309,7 +310,7 @@ public OpenCGAResult update(Query query, ObjectMap parameters, List } OpenCGAResult privateUpdate(ClientSession clientSession, Document sampleDocument, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { long tmpStartTime = startQuery(); String sampleId = sampleDocument.getString(QueryParams.ID.key()); @@ -321,7 +322,7 @@ OpenCGAResult privateUpdate(ClientSession clientSession, Document sample .append(QueryParams.STUDY_UID.key(), studyUid) .append(QueryParams.UID.key(), sampleUid); Bson bsonQuery = parseQuery(tmpQuery); - return versionedMongoDBAdaptor.update(clientSession, bsonQuery, (entrylist) -> { + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateSampleReferences = (sampleList) -> { // Perform the update DataResult result = updateAnnotationSets(clientSession, studyUid, sampleUid, parameters, variableSetList, queryOptions, true); @@ -372,12 +373,21 @@ OpenCGAResult privateUpdate(ClientSession clientSession, Document sample } return endWrite(tmpStartTime, 1, 1, events); - }, this::iterator, (DBIterator iterator) -> updateReferencesAfterSampleVersionIncrement(clientSession, iterator)); + }; + + List fieldsToInclude = Arrays.asList(QueryParams.ID.key(), QueryParams.UID.key(), QueryParams.VERSION.key(), + QueryParams.STUDY_UID.key(), PRIVATE_INDIVIDUAL_UID); + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, bsonQuery, fieldsToInclude, updateSampleReferences, this::iterator, + (DBIterator iterator) -> updateReferencesAfterSampleVersionIncrement(clientSession, iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, bsonQuery, fieldsToInclude, updateSampleReferences); + } } @Override OpenCGAResult transactionalUpdate(ClientSession clientSession, Sample sample, ObjectMap parameters, - List variableSetList, QueryOptions queryOptions) + List variableSetList, QueryOptions queryOptions, boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); long studyUid = sample.getStudyUid(); @@ -396,8 +406,8 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, Sample sa QueryParams.STUDY_UID.key(), PRIVATE_INDIVIDUAL_UID); Bson bsonQuery = parseQuery(tmpQuery); - return versionedMongoDBAdaptor.update(clientSession, bsonQuery, fieldsToInclude, (entrylist) -> { - String sampleId = entrylist.get(0).getString(QueryParams.ID.key()); + SnapshotVersionedMongoDBAdaptor.FunctionWithException updateSampleReferences = (sampleList) -> { + String sampleId = sampleList.get(0).getString(QueryParams.ID.key()); // Perform the update DataResult result = updateAnnotationSets(clientSession, studyUid, sampleUid, parameters, variableSetList, queryOptions, true); @@ -420,12 +430,12 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, Sample sa result = sampleCollection.update(clientSession, finalQuery, sampleUpdate, new QueryOptions(MongoDBCollection.MULTI, true)); if (updateParams.getSet().containsKey(PRIVATE_INDIVIDUAL_UID)) { - long individualUid = entrylist.get(0).getLong(PRIVATE_INDIVIDUAL_UID); + long individualUid = sampleList.get(0).getLong(PRIVATE_INDIVIDUAL_UID); long newIndividualUid = updateParams.getSet().getLong(PRIVATE_INDIVIDUAL_UID); // If the sample has been associated a different individual if (newIndividualUid != individualUid) { - int version = entrylist.get(0).getInteger(QueryParams.VERSION.key()); + int version = sampleList.get(0).getInteger(QueryParams.VERSION.key()); Sample tmpSample = new Sample() .setUid(sampleUid) .setVersion(version) @@ -455,17 +465,23 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, Sample sa } return endWrite(tmpStartTime, 1, 1, events); - }, this::iterator, (DBIterator iterator) -> updateReferencesAfterSampleVersionIncrement(clientSession, iterator)); + }; + + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, bsonQuery, fieldsToInclude, updateSampleReferences, this::iterator, + (DBIterator iterator) -> updateReferencesAfterSampleVersionIncrement(clientSession, iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, bsonQuery, fieldsToInclude, updateSampleReferences); + } } @Override - OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument) + OpenCGAResult transactionalUpdate(ClientSession clientSession, long studyUid, Bson query, UpdateDocument updateDocument, + boolean incrementVersion) throws CatalogParameterException, CatalogDBException, CatalogAuthorizationException { long tmpStartTime = startQuery(); - List includeIds = Arrays.asList(QueryParams.ID.key(), QueryParams.UID.key(), QueryParams.VERSION.key(), - QueryParams.STUDY_UID.key(), PRIVATE_INDIVIDUAL_UID); - return versionedMongoDBAdaptor.update(clientSession, query, includeIds, (sampleList) -> { + Function, OpenCGAResult> updateSampleReferences = (sampleList) -> { List events = new ArrayList<>(); Document update = updateDocument.toFinalUpdateDocument(); if (!update.isEmpty()) { @@ -481,7 +497,16 @@ OpenCGAResult transactionalUpdate(ClientSession clientSession, long stud logger.debug("Samples {} successfully updated", StringUtils.join(sampleIds, ", ")); } return endWrite(tmpStartTime, sampleList.size(), sampleList.size(), events); - }, this::iterator, (DBIterator iterator) -> updateReferencesAfterSampleVersionIncrement(clientSession, iterator)); + }; + + List includeIds = Arrays.asList(QueryParams.ID.key(), QueryParams.UID.key(), QueryParams.VERSION.key(), + QueryParams.STUDY_UID.key(), PRIVATE_INDIVIDUAL_UID); + if (incrementVersion) { + return versionedMongoDBAdaptor.update(clientSession, query, includeIds, updateSampleReferences::apply, this::iterator, + (DBIterator iterator) -> updateReferencesAfterSampleVersionIncrement(clientSession, iterator)); + } else { + return versionedMongoDBAdaptor.updateWithoutVersionIncrement(clientSession, query, includeIds, updateSampleReferences::apply); + } } private void updateReferencesAfterSampleVersionIncrement(ClientSession clientSession, DBIterator iterator) @@ -528,7 +553,7 @@ void updateCohortReferences(ClientSession clientSession, long studyUid, List iterator(ClientSession session, Query query, QueryOptions options) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException; } + @FunctionalInterface + public interface FunctionWithException { + OpenCGAResult execute(List entryList) + throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException; + } + protected void insert(ClientSession session, Document document) { // Versioning private parameters document.put(VERSION, 1); @@ -149,7 +155,7 @@ protected void insert(ClientSession session, Document document) { archiveCollection.insert(session, document, QueryOptions.empty()); } - protected OpenCGAResult update(ClientSession session, Bson sourceQuery, VersionedModelExecution> update, + protected OpenCGAResult update(ClientSession session, Bson sourceQuery, FunctionWithException update, PostVersionIncrementIterator postVersionIncrementIterator, ReferenceModelExecution postVersionIncrementExecution) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { @@ -158,8 +164,7 @@ protected OpenCGAResult update(ClientSession session, Bson sourceQuery, V } protected OpenCGAResult update(ClientSession session, Bson sourceQuery, List fieldsToInclude, - VersionedModelExecution> update, - PostVersionIncrementIterator postVersionIncrementIterator, + FunctionWithException update, PostVersionIncrementIterator postVersionIncrementIterator, ReferenceModelExecution postVersionIncrementExecution) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { return update(session, sourceQuery, fieldsToInclude, update, Collections.emptyList(), postVersionIncrementIterator, @@ -167,8 +172,7 @@ protected OpenCGAResult update(ClientSession session, Bson sourceQuery, L } protected OpenCGAResult update(ClientSession session, Bson sourceQuery, List fieldsToInclude, - VersionedModelExecution> update, - List postVersionIncrementAdditionalIncludeFields, + FunctionWithException update, List postVersionIncrementAdditionalIncludeFields, PostVersionIncrementIterator dbIterator, ReferenceModelExecution postVersionIncrementExecution) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { @@ -301,6 +305,53 @@ protected OpenCGAResult updateWithoutVersionIncrement(Bson sourceQuery, N return executionResult; } + protected OpenCGAResult updateWithoutVersionIncrement(ClientSession clientSession, Bson sourceQuery, + List fieldsToInclude, FunctionWithException update) + throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { + // Obtain all entries that will be updated + Set includeFields = new HashSet<>(Arrays.asList(PRIVATE_UID, VERSION, RELEASE_FROM_VERSION, PRIVATE_TRANSACTION_ID)); + if (fieldsToInclude != null) { + includeFields.addAll(fieldsToInclude); + } + QueryOptions options = new QueryOptions(QueryOptions.INCLUDE, includeFields); + List entryList = new LinkedList<>(); + List allUids = new LinkedList<>(); + try (MongoDBIterator iterator = collection.iterator(clientSession, sourceQuery, null, null, options)) { + while (iterator.hasNext()) { + Document result = iterator.next(); + entryList.add(result); + + long uid = result.get(PRIVATE_UID, Number.class).longValue(); + allUids.add(uid); + } + } + + // Execute main update + OpenCGAResult executionResult = update.execute(entryList); + + // Fetch document containing update and copy into the archive collection + Bson bsonQuery = Filters.in(PRIVATE_UID, allUids); + options = new QueryOptions(MongoDBCollection.NO_CURSOR_TIMEOUT, true); + QueryOptions upsertOptions = new QueryOptions() + .append(MongoDBCollection.REPLACE, true) + .append(MongoDBCollection.UPSERT, true); + try (MongoDBIterator iterator = collection.iterator(clientSession, bsonQuery, null, null, options)) { + while (iterator.hasNext()) { + Document result = iterator.next(); + result.remove(PRIVATE_MONGO_ID); + + // Insert/replace in archive collection + Bson tmpBsonQuery = Filters.and( + Filters.eq(PRIVATE_UID, result.get(PRIVATE_UID)), + Filters.eq(VERSION, result.get(VERSION)) + ); + archiveCollection.update(clientSession, tmpBsonQuery, result, upsertOptions); + } + } + + return executionResult; + } + /** * Revert to a previous version. * From 4b8dad237f154ac2762e8ec8e1a0cebf5352cf8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 8 Oct 2024 14:16:06 +0100 Subject: [PATCH 003/122] storage: Add variant-walker tool #TASK-6722 --- .../analysis/variant/VariantExportTool.java | 4 + .../analysis/variant/VariantWalkerTool.java | 94 +++++ .../manager/VariantStorageManager.java | 38 ++ .../VariantExportOperationManager.java | 21 +- .../app/cli/main/OpenCgaCompleter.java | 2 +- .../app/cli/main/OpencgaCliOptionsParser.java | 1 + .../AnalysisVariantCommandExecutor.java | 130 ++++++ .../AnalysisVariantCommandOptions.java | 304 ++++++++++++++ opencga-client/src/main/R/R/Variant-methods.R | 18 + .../client/rest/clients/VariantClient.java | 26 ++ opencga-client/src/main/javascript/Variant.js | 22 + .../pyopencga/rest_clients/variant_client.py | 31 ++ .../core/cellbase/CellBaseValidator.java | 27 +- .../core/config/ConfigurationOption.java | 4 + .../models/variant/VariantWalkerParams.java | 75 ++++ .../rest/analysis/AnalysisWebService.java | 5 - .../rest/analysis/VariantWebService.java | 22 + .../core/variant/VariantStorageEngine.java | 45 +- .../core/variant/VariantStorageOptions.java | 22 + .../core/variant/io/VariantWriterFactory.java | 8 + .../dummy/DummyVariantStorageEngine.java | 8 + .../hadoop/utils/AbstractHBaseDriver.java | 27 +- .../utils/ValueOnlyTextOutputFormat.java | 33 ++ .../variant/HadoopVariantStorageEngine.java | 19 + .../hadoop/variant/io/VariantDriver.java | 177 ++++++++ .../variant/io/VariantExporterDriver.java | 167 ++------ .../variant/mr/StreamVariantDriver.java | 140 +++++++ .../variant/mr/StreamVariantMapper.java | 394 ++++++++++++++++++ .../src/main/python/variant_walker.py | 157 +++++++ .../src/main/python/walker_example.py | 51 +++ .../VariantHadoopStoragePipelineTest.java | 49 +++ .../variant/io/HadoopVariantExporterTest.java | 2 +- .../src/test/resources/gaps/file1.genome.vcf | 1 + .../src/test/resources/gaps/file2.genome.vcf | 1 + 34 files changed, 1950 insertions(+), 175 deletions(-) create mode 100644 opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/ValueOnlyTextOutputFormat.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/walker_example.py diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java index ea6bb73998..b0a2005ac1 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java @@ -72,6 +72,9 @@ protected List getSteps() { protected void run() throws Exception { List uris = new ArrayList<>(2); step(ID, () -> { + // Use scratch directory to store intermediate files. Move files to final directory at the end + // The scratch directory is expected to be faster than the final directory + // This also avoids moving files to final directory if the tool fails Path outDir = getScratchDir(); String outputFile = StringUtils.isEmpty(toolParams.getOutputFileName()) ? outDir.toString() @@ -86,6 +89,7 @@ protected void run() throws Exception { toolParams.getVariantsFile(), query, queryOptions, token)); }); step("move-files", () -> { + // Move files to final directory IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0)); for (URI uri : uris) { String fileName = UriUtils.fileName(uri); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java new file mode 100644 index 0000000000..3e826de405 --- /dev/null +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -0,0 +1,94 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.opencga.analysis.variant; + +import org.apache.solr.common.StringUtils; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.analysis.tools.OpenCgaTool; +import org.opencb.opencga.catalog.io.IOManager; +import org.opencb.opencga.core.common.UriUtils; +import org.opencb.opencga.core.models.common.Enums; +import org.opencb.opencga.core.models.variant.VariantWalkerParams; +import org.opencb.opencga.core.tools.annotations.Tool; +import org.opencb.opencga.core.tools.annotations.ToolParams; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; + +import java.net.URI; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +@Tool(id = VariantWalkerTool.ID, description = VariantWalkerTool.DESCRIPTION, + scope = Tool.Scope.PROJECT, resource = Enums.Resource.VARIANT) +public class VariantWalkerTool extends OpenCgaTool { + public static final String ID = "variant-walk"; + public static final String DESCRIPTION = "Filter and walk variants from the variant storage to produce a file"; + + @ToolParams + protected VariantWalkerParams toolParams = new VariantWalkerParams(); + + private VariantWriterFactory.VariantOutputFormat format; + + @Override + protected void check() throws Exception { + super.check(); + + if (StringUtils.isEmpty(toolParams.getFileFormat())) { + toolParams.setFileFormat(VariantWriterFactory.VariantOutputFormat.VCF.toString()); + } + + format = VariantWriterFactory.toOutputFormat(toolParams.getOutputFileName(), toolParams.getOutputFileName()); + } + + @Override + protected List getSteps() { + return Arrays.asList(ID, "move-files"); + } + + @Override + protected void run() throws Exception { + List uris = new ArrayList<>(2); + step(ID, () -> { + // Use scratch directory to store intermediate files. Move files to final directory at the end + // The scratch directory is expected to be faster than the final directory + // This also avoids moving files to final directory if the tool fails + Path outDir = getScratchDir(); + String outputFile = StringUtils.isEmpty(toolParams.getOutputFileName()) + ? outDir.toString() + : outDir.resolve(toolParams.getOutputFileName()).toString(); + Query query = toolParams.toQuery(); + QueryOptions queryOptions = new QueryOptions(params); + for (VariantQueryParam param : VariantQueryParam.values()) { + queryOptions.remove(param.key()); + } + uris.addAll(variantStorageManager.walkData(outputFile, + format, query, queryOptions, toolParams.getDockerImage(), toolParams.getCommandLine(), token)); + }); + step("move-files", () -> { + // Move files to final directory + IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0)); + for (URI uri : uris) { + String fileName = UriUtils.fileName(uri); + logger.info("Moving file -- " + fileName); + ioManager.move(uri, getOutDir().resolve(fileName).toUri()); + } + }); + } +} diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index bd71414a4e..d1e276fbf3 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -37,6 +37,7 @@ import org.opencb.commons.datastore.solr.SolrManager; import org.opencb.opencga.analysis.StorageManager; import org.opencb.opencga.analysis.variant.VariantExportTool; +import org.opencb.opencga.analysis.variant.VariantWalkerTool; import org.opencb.opencga.analysis.variant.manager.operations.*; import org.opencb.opencga.analysis.variant.metadata.CatalogStorageMetadataSynchronizer; import org.opencb.opencga.analysis.variant.metadata.CatalogVariantMetadataFactory; @@ -187,6 +188,32 @@ public List exportData(String outputFile, VariantOutputFormat outputFormat, }); } + /** + * Exports the result of the given query and the associated metadata. + * + * @param outputFile Optional output file. If null or empty, will print into the Standard output. Won't export any metadata. + * @param format Variant Output format. + * @param query Query with the variants to export + * @param queryOptions Query options + * @param dockerImage Docker image to use + * @param commandLine Command line to use + * @param token User's session id + * @throws CatalogException if there is any error with Catalog + * @throws StorageEngineException If there is any error exporting variants + * @return generated files + */ + public List walkData(String outputFile, VariantOutputFormat format, + Query query, QueryOptions queryOptions, String dockerImage, String commandLine, String token) + throws CatalogException, StorageEngineException { + String anyStudy = catalogUtils.getAnyStudy(query, token); + return secureAnalysis(VariantWalkerTool.ID, anyStudy, queryOptions, token, engine -> { + Query finalQuery = catalogUtils.parseQuery(query, queryOptions, engine.getCellBaseUtils(), token); + checkSamplesPermissions(finalQuery, queryOptions, token); + URI outputUri = new VariantExportOperationManager(this, engine).getOutputUri(outputFile, format, finalQuery, token); + return engine.walkData(outputUri, format, finalQuery, queryOptions, dockerImage, commandLine); + }); + } + // --------------------------// // Data Operation methods // // --------------------------// @@ -506,6 +533,8 @@ public boolean hasVariantSetup(String studyStr, String token) throws CatalogExce public ObjectMap configureProject(String projectStr, ObjectMap params, String token) throws CatalogException, StorageEngineException { return secureOperationByProject("configure", projectStr, params, token, engine -> { + validateNewConfiguration(engine, params); + DataStore dataStore = getDataStoreByProjectId(projectStr, token); dataStore.getOptions().putAll(params); @@ -517,6 +546,7 @@ public ObjectMap configureProject(String projectStr, ObjectMap params, String to public ObjectMap configureStudy(String studyStr, ObjectMap params, String token) throws CatalogException, StorageEngineException { return secureOperation("configure", studyStr, params, token, engine -> { + validateNewConfiguration(engine, params); Study study = catalogManager.getStudyManager() .get(studyStr, new QueryOptions(INCLUDE, StudyDBAdaptor.QueryParams.INTERNAL_CONFIGURATION_VARIANT_ENGINE_OPTIONS.key()), @@ -540,6 +570,14 @@ public ObjectMap configureStudy(String studyStr, ObjectMap params, String token) }); } + private void validateNewConfiguration(VariantStorageEngine engine, ObjectMap params) throws StorageEngineException { + for (VariantStorageOptions option : VariantStorageOptions.values()) { + if (option.isProtected() && params.get(option.key()) != null) { + throw new StorageEngineException("Unable to update protected option '" + option.key() + "'"); + } + } + } + /** * Modify SampleIndex configuration. Automatically submit a job to rebuild the sample index. * diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantExportOperationManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantExportOperationManager.java index 54ca3d1111..880d0232a8 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantExportOperationManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantExportOperationManager.java @@ -30,6 +30,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.nio.file.Paths; @@ -48,6 +49,17 @@ public VariantExportOperationManager(VariantStorageManager variantStorageManager public List export(String outputFileStr, VariantWriterFactory.VariantOutputFormat outputFormat, String variantsFile, Query query, QueryOptions queryOptions, String token) throws Exception { + URI outputFile = getOutputUri(outputFileStr, outputFormat, query, token); + + VariantMetadataFactory metadataExporter = + new CatalogVariantMetadataFactory(catalogManager, variantStorageEngine.getDBAdaptor(), token); + + URI variantsFileUri = StringUtils.isEmpty(variantsFile) ? null : UriUtils.createUri(variantsFile); + return variantStorageEngine.exportData(outputFile, outputFormat, variantsFileUri, query, queryOptions, metadataExporter); + } + + public URI getOutputUri(String outputFileStr, VariantWriterFactory.VariantOutputFormat format, Query query, String token) + throws CatalogException, IOException { URI outputFile; if (!VariantWriterFactory.isStandardOutput(outputFileStr)) { URI outdirUri; @@ -71,19 +83,14 @@ public List export(String outputFileStr, VariantWriterFactory.VariantOutput outputFileName = buildOutputFileName(query, token); } outputFile = outdirUri.resolve(outputFileName); - outputFile = VariantWriterFactory.checkOutput(outputFile, outputFormat); + outputFile = VariantWriterFactory.checkOutput(outputFile, format); } else { outputFile = outdirUri; } } else { outputFile = null; } - - VariantMetadataFactory metadataExporter = - new CatalogVariantMetadataFactory(catalogManager, variantStorageEngine.getDBAdaptor(), token); - - URI variantsFileUri = StringUtils.isEmpty(variantsFile) ? null : UriUtils.createUri(variantsFile); - return variantStorageEngine.exportData(outputFile, outputFormat, variantsFileUri, query, queryOptions, metadataExporter); + return outputFile; } private String buildOutputFileName(Query query, String token) throws CatalogException { diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java index fa5b348228..6197b04939 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpenCgaCompleter.java @@ -29,7 +29,7 @@ public abstract class OpenCgaCompleter implements Completer { .map(Candidate::new) .collect(toList()); - private List variantList = asList( "aggregationstats","annotation-metadata","annotation-query","circos-run","cohort-stats-delete","cohort-stats-info","cohort-stats-run","exomiser-run","export-run","family-genotypes","family-qc-run","file-delete","gatk-run","genome-plot-run","gwas-run","hr-detect-run","index-run","individual-qc-run","inferred-sex-run","knockout-gene-query","knockout-individual-query","knockout-run","mendelian-error-run","metadata","mutational-signature-query","mutational-signature-run","plink-run","query","relatedness-run","rvtests-run","sample-aggregation-stats","sample-eligibility-run","sample-qc-run","sample-query","sample-run","sample-stats-query","sample-stats-run","stats-export-run","stats-run") + private List variantList = asList( "aggregationstats","annotation-metadata","annotation-query","circos-run","cohort-stats-delete","cohort-stats-info","cohort-stats-run","exomiser-run","export-run","family-genotypes","family-qc-run","file-delete","gatk-run","genome-plot-run","gwas-run","hr-detect-run","index-run","individual-qc-run","inferred-sex-run","knockout-gene-query","knockout-individual-query","knockout-run","mendelian-error-run","metadata","mutational-signature-query","mutational-signature-run","plink-run","query","relatedness-run","rvtests-run","sample-aggregation-stats","sample-eligibility-run","sample-qc-run","sample-query","sample-run","sample-stats-query","sample-stats-run","stats-export-run","stats-run","walker-run") .stream() .map(Candidate::new) .collect(toList()); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java index cfd4cee991..eb95cf2fd1 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java @@ -84,6 +84,7 @@ public OpencgaCliOptionsParser() { analysisVariantSubCommands.addCommand("sample-stats-run", analysisVariantCommandOptions.runSampleStatsCommandOptions); analysisVariantSubCommands.addCommand("stats-export-run", analysisVariantCommandOptions.runStatsExportCommandOptions); analysisVariantSubCommands.addCommand("stats-run", analysisVariantCommandOptions.runStatsCommandOptions); + analysisVariantSubCommands.addCommand("walker-run", analysisVariantCommandOptions.runWalkerCommandOptions); projectsCommandOptions = new ProjectsCommandOptions(commonCommandOptions, jCommander); jCommander.addCommand("projects", projectsCommandOptions); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java index 57edea737f..3e3d5cd752 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java @@ -52,6 +52,7 @@ import org.opencb.opencga.core.models.variant.SampleVariantStatsAnalysisParams; import org.opencb.opencga.core.models.variant.VariantExportParams; import org.opencb.opencga.core.models.variant.VariantStatsAnalysisParams; +import org.opencb.opencga.core.models.variant.VariantWalkerParams; import org.opencb.opencga.core.response.QueryType; import org.opencb.opencga.core.response.RestResponse; import org.opencb.oskar.analysis.variant.gwas.GwasConfiguration; @@ -207,6 +208,9 @@ public void execute() throws Exception { case "stats-run": queryResponse = runStats(); break; + case "walker-run": + queryResponse = runWalker(); + break; default: logger.error("Subcommand not valid"); break; @@ -1874,4 +1878,130 @@ private RestResponse runStats() throws Exception { } return openCGAClient.getVariantClient().runStats(variantStatsAnalysisParams, queryParams); } + + private RestResponse runWalker() throws Exception { + logger.debug("Executing runWalker in Analysis - Variant command line"); + + AnalysisVariantCommandOptions.RunWalkerCommandOptions commandOptions = analysisVariantCommandOptions.runWalkerCommandOptions; + + ObjectMap queryParams = new ObjectMap(); + queryParams.putIfNotEmpty("include", commandOptions.include); + queryParams.putIfNotEmpty("exclude", commandOptions.exclude); + queryParams.putIfNotEmpty("project", commandOptions.project); + queryParams.putIfNotEmpty("study", commandOptions.study); + queryParams.putIfNotEmpty("jobId", commandOptions.jobId); + queryParams.putIfNotEmpty("jobDescription", commandOptions.jobDescription); + queryParams.putIfNotEmpty("jobDependsOn", commandOptions.jobDependsOn); + queryParams.putIfNotEmpty("jobTags", commandOptions.jobTags); + queryParams.putIfNotEmpty("jobScheduledStartTime", commandOptions.jobScheduledStartTime); + queryParams.putIfNotEmpty("jobPriority", commandOptions.jobPriority); + queryParams.putIfNotNull("jobDryRun", commandOptions.jobDryRun); + if (queryParams.get("study") == null && OpencgaMain.isShellMode()) { + queryParams.putIfNotEmpty("study", sessionManager.getSession().getCurrentStudy()); + } + + + VariantWalkerParams variantWalkerParams = null; + if (commandOptions.jsonDataModel) { + RestResponse res = new RestResponse<>(); + res.setType(QueryType.VOID); + PrintUtils.println(getObjectAsJSON(categoryName,"/{apiVersion}/analysis/variant/walker/run")); + return res; + } else if (commandOptions.jsonFile != null) { + variantWalkerParams = JacksonUtils.getDefaultObjectMapper() + .readValue(new java.io.File(commandOptions.jsonFile), VariantWalkerParams.class); + } else { + ObjectMap beanParams = new ObjectMap(); + putNestedIfNotEmpty(beanParams, "id", commandOptions.id, true); + putNestedIfNotEmpty(beanParams, "region", commandOptions.region, true); + putNestedIfNotEmpty(beanParams, "gene", commandOptions.gene, true); + putNestedIfNotEmpty(beanParams, "type", commandOptions.type, true); + putNestedIfNotEmpty(beanParams, "panel", commandOptions.panel, true); + putNestedIfNotEmpty(beanParams, "panelModeOfInheritance", commandOptions.panelModeOfInheritance, true); + putNestedIfNotEmpty(beanParams, "panelConfidence", commandOptions.panelConfidence, true); + putNestedIfNotEmpty(beanParams, "panelRoleInCancer", commandOptions.panelRoleInCancer, true); + putNestedIfNotNull(beanParams, "panelIntersection", commandOptions.panelIntersection, true); + putNestedIfNotEmpty(beanParams, "panelFeatureType", commandOptions.panelFeatureType, true); + putNestedIfNotEmpty(beanParams, "cohortStatsRef", commandOptions.cohortStatsRef, true); + putNestedIfNotEmpty(beanParams, "cohortStatsAlt", commandOptions.cohortStatsAlt, true); + putNestedIfNotEmpty(beanParams, "cohortStatsMaf", commandOptions.cohortStatsMaf, true); + putNestedIfNotEmpty(beanParams, "ct", commandOptions.ct, true); + putNestedIfNotEmpty(beanParams, "xref", commandOptions.xref, true); + putNestedIfNotEmpty(beanParams, "biotype", commandOptions.biotype, true); + putNestedIfNotEmpty(beanParams, "proteinSubstitution", commandOptions.proteinSubstitution, true); + putNestedIfNotEmpty(beanParams, "conservation", commandOptions.conservation, true); + putNestedIfNotEmpty(beanParams, "populationFrequencyMaf", commandOptions.populationFrequencyMaf, true); + putNestedIfNotEmpty(beanParams, "populationFrequencyAlt", commandOptions.populationFrequencyAlt, true); + putNestedIfNotEmpty(beanParams, "populationFrequencyRef", commandOptions.populationFrequencyRef, true); + putNestedIfNotEmpty(beanParams, "transcriptFlag", commandOptions.transcriptFlag, true); + putNestedIfNotEmpty(beanParams, "functionalScore", commandOptions.functionalScore, true); + putNestedIfNotEmpty(beanParams, "clinical", commandOptions.clinical, true); + putNestedIfNotEmpty(beanParams, "clinicalSignificance", commandOptions.clinicalSignificance, true); + putNestedIfNotNull(beanParams, "clinicalConfirmedStatus", commandOptions.clinicalConfirmedStatus, true); + putNestedIfNotEmpty(beanParams, "project", commandOptions.bodyProject, true); + putNestedIfNotEmpty(beanParams, "study", commandOptions.bodyStudy, true); + putNestedIfNotEmpty(beanParams, "savedFilter", commandOptions.savedFilter, true); + putNestedIfNotEmpty(beanParams, "chromosome", commandOptions.chromosome, true); + putNestedIfNotEmpty(beanParams, "reference", commandOptions.reference, true); + putNestedIfNotEmpty(beanParams, "alternate", commandOptions.alternate, true); + putNestedIfNotEmpty(beanParams, "release", commandOptions.release, true); + putNestedIfNotEmpty(beanParams, "includeStudy", commandOptions.includeStudy, true); + putNestedIfNotEmpty(beanParams, "includeSample", commandOptions.includeSample, true); + putNestedIfNotEmpty(beanParams, "includeFile", commandOptions.includeFile, true); + putNestedIfNotEmpty(beanParams, "includeSampleData", commandOptions.includeSampleData, true); + putNestedIfNotNull(beanParams, "includeSampleId", commandOptions.includeSampleId, true); + putNestedIfNotNull(beanParams, "includeGenotype", commandOptions.includeGenotype, true); + putNestedIfNotEmpty(beanParams, "file", commandOptions.file, true); + putNestedIfNotEmpty(beanParams, "qual", commandOptions.qual, true); + putNestedIfNotEmpty(beanParams, "filter", commandOptions.filter, true); + putNestedIfNotEmpty(beanParams, "fileData", commandOptions.fileData, true); + putNestedIfNotEmpty(beanParams, "genotype", commandOptions.genotype, true); + putNestedIfNotEmpty(beanParams, "sample", commandOptions.sample, true); + putNestedIfNotNull(beanParams, "sampleLimit", commandOptions.sampleLimit, true); + putNestedIfNotNull(beanParams, "sampleSkip", commandOptions.sampleSkip, true); + putNestedIfNotEmpty(beanParams, "sampleData", commandOptions.sampleData, true); + putNestedIfNotEmpty(beanParams, "sampleAnnotation", commandOptions.sampleAnnotation, true); + putNestedIfNotEmpty(beanParams, "family", commandOptions.family, true); + putNestedIfNotEmpty(beanParams, "familyMembers", commandOptions.familyMembers, true); + putNestedIfNotEmpty(beanParams, "familyDisorder", commandOptions.familyDisorder, true); + putNestedIfNotEmpty(beanParams, "familyProband", commandOptions.familyProband, true); + putNestedIfNotEmpty(beanParams, "familySegregation", commandOptions.familySegregation, true); + putNestedIfNotEmpty(beanParams, "cohort", commandOptions.cohort, true); + putNestedIfNotEmpty(beanParams, "cohortStatsPass", commandOptions.cohortStatsPass, true); + putNestedIfNotEmpty(beanParams, "cohortStatsMgf", commandOptions.cohortStatsMgf, true); + putNestedIfNotEmpty(beanParams, "missingAlleles", commandOptions.missingAlleles, true); + putNestedIfNotEmpty(beanParams, "missingGenotypes", commandOptions.missingGenotypes, true); + putNestedIfNotNull(beanParams, "annotationExists", commandOptions.annotationExists, true); + putNestedIfNotEmpty(beanParams, "score", commandOptions.score, true); + putNestedIfNotEmpty(beanParams, "polyphen", commandOptions.polyphen, true); + putNestedIfNotEmpty(beanParams, "sift", commandOptions.sift, true); + putNestedIfNotEmpty(beanParams, "geneRoleInCancer", commandOptions.geneRoleInCancer, true); + putNestedIfNotEmpty(beanParams, "geneTraitId", commandOptions.geneTraitId, true); + putNestedIfNotEmpty(beanParams, "geneTraitName", commandOptions.geneTraitName, true); + putNestedIfNotEmpty(beanParams, "trait", commandOptions.trait, true); + putNestedIfNotEmpty(beanParams, "cosmic", commandOptions.cosmic, true); + putNestedIfNotEmpty(beanParams, "clinvar", commandOptions.clinvar, true); + putNestedIfNotEmpty(beanParams, "hpo", commandOptions.hpo, true); + putNestedIfNotEmpty(beanParams, "go", commandOptions.go, true); + putNestedIfNotEmpty(beanParams, "expression", commandOptions.expression, true); + putNestedIfNotEmpty(beanParams, "proteinKeyword", commandOptions.proteinKeyword, true); + putNestedIfNotEmpty(beanParams, "drug", commandOptions.drug, true); + putNestedIfNotEmpty(beanParams, "customAnnotation", commandOptions.customAnnotation, true); + putNestedIfNotEmpty(beanParams, "unknownGenotype", commandOptions.unknownGenotype, true); + putNestedIfNotNull(beanParams, "sampleMetadata", commandOptions.sampleMetadata, true); + putNestedIfNotNull(beanParams, "sort", commandOptions.sort, true); + putNestedIfNotEmpty(beanParams, "outdir", commandOptions.outdir, true); + putNestedIfNotEmpty(beanParams, "outputFileName", commandOptions.outputFileName, true); + putNestedIfNotEmpty(beanParams, "fileFormat", commandOptions.fileFormat, true); + putNestedIfNotEmpty(beanParams, "dockerImage", commandOptions.dockerImage, true); + putNestedIfNotEmpty(beanParams, "commandLine", commandOptions.commandLine, true); + putNestedIfNotEmpty(beanParams, "include", commandOptions.bodyInclude, true); + putNestedIfNotEmpty(beanParams, "exclude", commandOptions.bodyExclude, true); + + variantWalkerParams = JacksonUtils.getDefaultObjectMapper().copy() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true) + .readValue(beanParams.toJson(), VariantWalkerParams.class); + } + return openCGAClient.getVariantClient().runWalker(variantWalkerParams, queryParams); + } } \ No newline at end of file diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java index 44a0ad64dd..998a7dc510 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java @@ -72,6 +72,7 @@ public class AnalysisVariantCommandOptions { public RunSampleStatsCommandOptions runSampleStatsCommandOptions; public RunStatsExportCommandOptions runStatsExportCommandOptions; public RunStatsCommandOptions runStatsCommandOptions; + public RunWalkerCommandOptions runWalkerCommandOptions; public AnalysisVariantCommandOptions(CommonCommandOptions commonCommandOptions, JCommander jCommander) { @@ -117,6 +118,7 @@ public AnalysisVariantCommandOptions(CommonCommandOptions commonCommandOptions, this.runSampleStatsCommandOptions = new RunSampleStatsCommandOptions(); this.runStatsExportCommandOptions = new RunStatsExportCommandOptions(); this.runStatsCommandOptions = new RunStatsCommandOptions(); + this.runWalkerCommandOptions = new RunWalkerCommandOptions(); } @@ -2832,4 +2834,306 @@ public class RunStatsCommandOptions { } + @Parameters(commandNames = {"walker-run"}, commandDescription ="Filter and walk variants from the variant storage to produce a file") + public class RunWalkerCommandOptions { + + @ParametersDelegate + public CommonCommandOptions commonOptions = commonCommandOptions; + + @Parameter(names = {"--json-file"}, description = "File with the body data in JSON format. Note, that using this parameter will ignore all the other parameters.", required = false, arity = 1) + public String jsonFile; + + @Parameter(names = {"--json-data-model"}, description = "Show example of file structure for body data.", help = true, arity = 0) + public Boolean jsonDataModel = false; + + @Parameter(names = {"--include", "-I"}, description = "Fields included in the response, whole JSON path must be provided", required = false, arity = 1) + public String include; + + @Parameter(names = {"--exclude", "-E"}, description = "Fields excluded in the response, whole JSON path must be provided", required = false, arity = 1) + public String exclude; + + @Parameter(names = {"--project", "-p"}, description = "Project [organization@]project where project can be either the ID or the alias", required = false, arity = 1) + public String project; + + @Parameter(names = {"--study", "-s"}, description = "Study [[organization@]project:]study where study and project can be either the ID or UUID", required = false, arity = 1) + public String study; + + @Parameter(names = {"--job-id"}, description = "Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided.", required = false, arity = 1) + public String jobId; + + @Parameter(names = {"--job-description"}, description = "Job description", required = false, arity = 1) + public String jobDescription; + + @Parameter(names = {"--job-depends-on"}, description = "Comma separated list of existing job IDs the job will depend on.", required = false, arity = 1) + public String jobDependsOn; + + @Parameter(names = {"--job-tags"}, description = "Job tags", required = false, arity = 1) + public String jobTags; + + @Parameter(names = {"--job-scheduled-start-time"}, description = "Time when the job is scheduled to start.", required = false, arity = 1) + public String jobScheduledStartTime; + + @Parameter(names = {"--job-priority"}, description = "Priority of the job", required = false, arity = 1) + public String jobPriority; + + @Parameter(names = {"--job-dry-run"}, description = "Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.", required = false, arity = 1) + public Boolean jobDryRun; + + @Parameter(names = {"--id"}, description = "The body web service id parameter", required = false, arity = 1) + public String id; + + @Parameter(names = {"--region"}, description = "The body web service region parameter", required = false, arity = 1) + public String region; + + @Parameter(names = {"--gene"}, description = "The body web service gene parameter", required = false, arity = 1) + public String gene; + + @Parameter(names = {"--type"}, description = "The body web service type parameter", required = false, arity = 1) + public String type; + + @Parameter(names = {"--panel"}, description = "The body web service panel parameter", required = false, arity = 1) + public String panel; + + @Parameter(names = {"--panel-mode-of-inheritance"}, description = "The body web service panelModeOfInheritance parameter", required = false, arity = 1) + public String panelModeOfInheritance; + + @Parameter(names = {"--panel-confidence"}, description = "The body web service panelConfidence parameter", required = false, arity = 1) + public String panelConfidence; + + @Parameter(names = {"--panel-role-in-cancer"}, description = "The body web service panelRoleInCancer parameter", required = false, arity = 1) + public String panelRoleInCancer; + + @Parameter(names = {"--panel-intersection"}, description = "The body web service panelIntersection parameter", required = false, help = true, arity = 0) + public boolean panelIntersection = false; + + @Parameter(names = {"--panel-feature-type"}, description = "The body web service panelFeatureType parameter", required = false, arity = 1) + public String panelFeatureType; + + @Parameter(names = {"--cohort-stats-ref"}, description = "The body web service cohortStatsRef parameter", required = false, arity = 1) + public String cohortStatsRef; + + @Parameter(names = {"--cohort-stats-alt"}, description = "The body web service cohortStatsAlt parameter", required = false, arity = 1) + public String cohortStatsAlt; + + @Parameter(names = {"--cohort-stats-maf"}, description = "The body web service cohortStatsMaf parameter", required = false, arity = 1) + public String cohortStatsMaf; + + @Parameter(names = {"--ct"}, description = "The body web service ct parameter", required = false, arity = 1) + public String ct; + + @Parameter(names = {"--xref"}, description = "The body web service xref parameter", required = false, arity = 1) + public String xref; + + @Parameter(names = {"--biotype"}, description = "The body web service biotype parameter", required = false, arity = 1) + public String biotype; + + @Parameter(names = {"--protein-substitution"}, description = "The body web service proteinSubstitution parameter", required = false, arity = 1) + public String proteinSubstitution; + + @Parameter(names = {"--conservation"}, description = "The body web service conservation parameter", required = false, arity = 1) + public String conservation; + + @Parameter(names = {"--population-frequency-maf"}, description = "The body web service populationFrequencyMaf parameter", required = false, arity = 1) + public String populationFrequencyMaf; + + @Parameter(names = {"--population-frequency-alt"}, description = "The body web service populationFrequencyAlt parameter", required = false, arity = 1) + public String populationFrequencyAlt; + + @Parameter(names = {"--population-frequency-ref"}, description = "The body web service populationFrequencyRef parameter", required = false, arity = 1) + public String populationFrequencyRef; + + @Parameter(names = {"--transcript-flag"}, description = "The body web service transcriptFlag parameter", required = false, arity = 1) + public String transcriptFlag; + + @Parameter(names = {"--functional-score"}, description = "The body web service functionalScore parameter", required = false, arity = 1) + public String functionalScore; + + @Parameter(names = {"--clinical"}, description = "The body web service clinical parameter", required = false, arity = 1) + public String clinical; + + @Parameter(names = {"--clinical-significance"}, description = "The body web service clinicalSignificance parameter", required = false, arity = 1) + public String clinicalSignificance; + + @Parameter(names = {"--clinical-confirmed-status"}, description = "The body web service clinicalConfirmedStatus parameter", required = false, help = true, arity = 0) + public boolean clinicalConfirmedStatus = false; + + @Parameter(names = {"--body_project"}, description = "The body web service project parameter", required = false, arity = 1) + public String bodyProject; + + @Parameter(names = {"--body_study"}, description = "The body web service study parameter", required = false, arity = 1) + public String bodyStudy; + + @Parameter(names = {"--saved-filter"}, description = "The body web service savedFilter parameter", required = false, arity = 1) + public String savedFilter; + + @Parameter(names = {"--chromosome"}, description = "The body web service chromosome parameter", required = false, arity = 1) + public String chromosome; + + @Parameter(names = {"--reference"}, description = "The body web service reference parameter", required = false, arity = 1) + public String reference; + + @Parameter(names = {"--alternate"}, description = "The body web service alternate parameter", required = false, arity = 1) + public String alternate; + + @Parameter(names = {"--release"}, description = "The body web service release parameter", required = false, arity = 1) + public String release; + + @Parameter(names = {"--include-study"}, description = "The body web service includeStudy parameter", required = false, arity = 1) + public String includeStudy; + + @Parameter(names = {"--include-sample"}, description = "The body web service includeSample parameter", required = false, arity = 1) + public String includeSample; + + @Parameter(names = {"--include-file"}, description = "The body web service includeFile parameter", required = false, arity = 1) + public String includeFile; + + @Parameter(names = {"--include-sample-data"}, description = "The body web service includeSampleData parameter", required = false, arity = 1) + public String includeSampleData; + + @Parameter(names = {"--include-sample-id"}, description = "The body web service includeSampleId parameter", required = false, help = true, arity = 0) + public boolean includeSampleId = false; + + @Parameter(names = {"--include-genotype"}, description = "The body web service includeGenotype parameter", required = false, help = true, arity = 0) + public boolean includeGenotype = false; + + @Parameter(names = {"--file"}, description = "The body web service file parameter", required = false, arity = 1) + public String file; + + @Parameter(names = {"--qual"}, description = "The body web service qual parameter", required = false, arity = 1) + public String qual; + + @Parameter(names = {"--filter"}, description = "The body web service filter parameter", required = false, arity = 1) + public String filter; + + @Parameter(names = {"--file-data"}, description = "The body web service fileData parameter", required = false, arity = 1) + public String fileData; + + @Parameter(names = {"--genotype"}, description = "The body web service genotype parameter", required = false, arity = 1) + public String genotype; + + @Parameter(names = {"--sample"}, description = "The body web service sample parameter", required = false, arity = 1) + public String sample; + + @Parameter(names = {"--sample-limit"}, description = "The body web service sampleLimit parameter", required = false, arity = 1) + public Integer sampleLimit; + + @Parameter(names = {"--sample-skip"}, description = "The body web service sampleSkip parameter", required = false, arity = 1) + public Integer sampleSkip; + + @Parameter(names = {"--sample-data"}, description = "The body web service sampleData parameter", required = false, arity = 1) + public String sampleData; + + @Parameter(names = {"--sample-annotation"}, description = "The body web service sampleAnnotation parameter", required = false, arity = 1) + public String sampleAnnotation; + + @Parameter(names = {"--family"}, description = "The body web service family parameter", required = false, arity = 1) + public String family; + + @Parameter(names = {"--family-members"}, description = "The body web service familyMembers parameter", required = false, arity = 1) + public String familyMembers; + + @Parameter(names = {"--family-disorder"}, description = "The body web service familyDisorder parameter", required = false, arity = 1) + public String familyDisorder; + + @Parameter(names = {"--family-proband"}, description = "The body web service familyProband parameter", required = false, arity = 1) + public String familyProband; + + @Parameter(names = {"--family-segregation"}, description = "The body web service familySegregation parameter", required = false, arity = 1) + public String familySegregation; + + @Parameter(names = {"--cohort"}, description = "The body web service cohort parameter", required = false, arity = 1) + public String cohort; + + @Parameter(names = {"--cohort-stats-pass"}, description = "The body web service cohortStatsPass parameter", required = false, arity = 1) + public String cohortStatsPass; + + @Parameter(names = {"--cohort-stats-mgf"}, description = "The body web service cohortStatsMgf parameter", required = false, arity = 1) + public String cohortStatsMgf; + + @Parameter(names = {"--missing-alleles"}, description = "The body web service missingAlleles parameter", required = false, arity = 1) + public String missingAlleles; + + @Parameter(names = {"--missing-genotypes"}, description = "The body web service missingGenotypes parameter", required = false, arity = 1) + public String missingGenotypes; + + @Parameter(names = {"--annotation-exists"}, description = "The body web service annotationExists parameter", required = false, arity = 1) + public Boolean annotationExists; + + @Parameter(names = {"--score"}, description = "The body web service score parameter", required = false, arity = 1) + public String score; + + @Parameter(names = {"--polyphen"}, description = "The body web service polyphen parameter", required = false, arity = 1) + public String polyphen; + + @Parameter(names = {"--sift"}, description = "The body web service sift parameter", required = false, arity = 1) + public String sift; + + @Parameter(names = {"--gene-role-in-cancer"}, description = "The body web service geneRoleInCancer parameter", required = false, arity = 1) + public String geneRoleInCancer; + + @Parameter(names = {"--gene-trait-id"}, description = "The body web service geneTraitId parameter", required = false, arity = 1) + public String geneTraitId; + + @Parameter(names = {"--gene-trait-name"}, description = "The body web service geneTraitName parameter", required = false, arity = 1) + public String geneTraitName; + + @Parameter(names = {"--trait"}, description = "The body web service trait parameter", required = false, arity = 1) + public String trait; + + @Parameter(names = {"--cosmic"}, description = "The body web service cosmic parameter", required = false, arity = 1) + public String cosmic; + + @Parameter(names = {"--clinvar"}, description = "The body web service clinvar parameter", required = false, arity = 1) + public String clinvar; + + @Parameter(names = {"--hpo"}, description = "The body web service hpo parameter", required = false, arity = 1) + public String hpo; + + @Parameter(names = {"--go"}, description = "The body web service go parameter", required = false, arity = 1) + public String go; + + @Parameter(names = {"--expression"}, description = "The body web service expression parameter", required = false, arity = 1) + public String expression; + + @Parameter(names = {"--protein-keyword"}, description = "The body web service proteinKeyword parameter", required = false, arity = 1) + public String proteinKeyword; + + @Parameter(names = {"--drug"}, description = "The body web service drug parameter", required = false, arity = 1) + public String drug; + + @Parameter(names = {"--custom-annotation"}, description = "The body web service customAnnotation parameter", required = false, arity = 1) + public String customAnnotation; + + @Parameter(names = {"--unknown-genotype"}, description = "The body web service unknownGenotype parameter", required = false, arity = 1) + public String unknownGenotype; + + @Parameter(names = {"--sample-metadata"}, description = "The body web service sampleMetadata parameter", required = false, help = true, arity = 0) + public boolean sampleMetadata = false; + + @Parameter(names = {"--sort"}, description = "The body web service sort parameter", required = false, help = true, arity = 0) + public boolean sort = false; + + @Parameter(names = {"--outdir"}, description = "The body web service outdir parameter", required = false, arity = 1) + public String outdir; + + @Parameter(names = {"--output-file-name"}, description = "The body web service outputFileName parameter", required = false, arity = 1) + public String outputFileName; + + @Parameter(names = {"--file-format"}, description = "The body web service fileFormat parameter", required = false, arity = 1) + public String fileFormat; + + @Parameter(names = {"--docker-image"}, description = "The body web service dockerImage parameter", required = false, arity = 1) + public String dockerImage; + + @Parameter(names = {"--command-line"}, description = "The body web service commandLine parameter", required = false, arity = 1) + public String commandLine; + + @Parameter(names = {"--body_include"}, description = "The body web service include parameter", required = false, arity = 1) + public String bodyInclude; + + @Parameter(names = {"--body_exclude"}, description = "The body web service exclude parameter", required = false, arity = 1) + public String bodyExclude; + + } + } \ No newline at end of file diff --git a/opencga-client/src/main/R/R/Variant-methods.R b/opencga-client/src/main/R/R/Variant-methods.R index 5413a7604b..b6979ac0c5 100644 --- a/opencga-client/src/main/R/R/Variant-methods.R +++ b/opencga-client/src/main/R/R/Variant-methods.R @@ -58,6 +58,7 @@ #' | runSampleStats | /{apiVersion}/analysis/variant/sample/stats/run | study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | #' | runStatsExport | /{apiVersion}/analysis/variant/stats/export/run | project, study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | #' | runStats | /{apiVersion}/analysis/variant/stats/run | study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | +#' | runWalker | /{apiVersion}/analysis/variant/walker/run | include, exclude, project, study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | #' #' @md #' @seealso \url{http://docs.opencb.org/display/opencga/Using+OpenCGA} and the RESTful API documentation @@ -718,5 +719,22 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N #' @param data Variant stats params. runStats=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="variant/stats", subcategoryId=NULL, action="run", params=params, httpMethod="POST", as.queryParam=NULL, ...), + + #' @section Endpoint /{apiVersion}/analysis/variant/walker/run: + #' Filter and walk variants from the variant storage to produce a file. + #' @param include Fields included in the response, whole JSON path must be provided. + #' @param exclude Fields excluded in the response, whole JSON path must be provided. + #' @param project Project [organization@]project where project can be either the ID or the alias. + #' @param study Study [[organization@]project:]study where study and project can be either the ID or UUID. + #' @param jobId Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided. + #' @param jobDescription Job description. + #' @param jobDependsOn Comma separated list of existing job IDs the job will depend on. + #' @param jobTags Job tags. + #' @param jobScheduledStartTime Time when the job is scheduled to start. + #' @param jobPriority Priority of the job. + #' @param jobDryRun Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run. + #' @param data Variant walker params. + runWalker=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="variant/walker", + subcategoryId=NULL, action="run", params=params, httpMethod="POST", as.queryParam=NULL, ...), ) }) \ No newline at end of file diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java index 6a68ae8ea8..04b6aa2da4 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java @@ -55,6 +55,7 @@ import org.opencb.opencga.core.models.variant.SampleVariantStatsAnalysisParams; import org.opencb.opencga.core.models.variant.VariantExportParams; import org.opencb.opencga.core.models.variant.VariantStatsAnalysisParams; +import org.opencb.opencga.core.models.variant.VariantWalkerParams; import org.opencb.opencga.core.response.RestResponse; @@ -1101,4 +1102,29 @@ public RestResponse runStats(VariantStatsAnalysisParams data, ObjectMap par params.put("body", data); return execute("analysis", null, "variant/stats", null, "run", params, POST, Job.class); } + + /** + * Filter and walk variants from the variant storage to produce a file. + * @param data Variant walker params. + * @param params Map containing any of the following optional parameters. + * include: Fields included in the response, whole JSON path must be provided. + * exclude: Fields excluded in the response, whole JSON path must be provided. + * project: Project [organization@]project where project can be either the ID or the alias. + * study: Study [[organization@]project:]study where study and project can be either the ID or UUID. + * jobId: Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided. + * jobDescription: Job description. + * jobDependsOn: Comma separated list of existing job IDs the job will depend on. + * jobTags: Job tags. + * jobScheduledStartTime: Time when the job is scheduled to start. + * jobPriority: Priority of the job. + * jobDryRun: Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all + * parameters and prerequisites are correctly set for successful execution, but the job will not actually run. + * @return a RestResponse object. + * @throws ClientException ClientException if there is any server error. + */ + public RestResponse runWalker(VariantWalkerParams data, ObjectMap params) throws ClientException { + params = params != null ? params : new ObjectMap(); + params.put("body", data); + return execute("analysis", null, "variant/walker", null, "run", params, POST, Job.class); + } } diff --git a/opencga-client/src/main/javascript/Variant.js b/opencga-client/src/main/javascript/Variant.js index 7d8b01966d..910b01cc8c 100644 --- a/opencga-client/src/main/javascript/Variant.js +++ b/opencga-client/src/main/javascript/Variant.js @@ -959,4 +959,26 @@ export default class Variant extends OpenCGAParentClass { return this._post("analysis", null, "variant/stats", null, "run", data, params); } + /** Filter and walk variants from the variant storage to produce a file + * @param {Object} data - Variant walker params. + * @param {Object} [params] - The Object containing the following optional parameters: + * @param {String} [params.include] - Fields included in the response, whole JSON path must be provided. + * @param {String} [params.exclude] - Fields excluded in the response, whole JSON path must be provided. + * @param {String} [params.project] - Project [organization@]project where project can be either the ID or the alias. + * @param {String} [params.study] - Study [[organization@]project:]study where study and project can be either the ID or UUID. + * @param {String} [params.jobId] - Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not + * provided. + * @param {String} [params.jobDescription] - Job description. + * @param {String} [params.jobDependsOn] - Comma separated list of existing job IDs the job will depend on. + * @param {String} [params.jobTags] - Job tags. + * @param {String} [params.jobScheduledStartTime] - Time when the job is scheduled to start. + * @param {String} [params.jobPriority] - Priority of the job. + * @param {Boolean} [params.jobDryRun] - Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will + * validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run. + * @returns {Promise} Promise object in the form of RestResponse instance. + */ + runWalker(data, params) { + return this._post("analysis", null, "variant/walker", null, "run", data, params); + } + } \ No newline at end of file diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py index 3993f48ba2..166e14137a 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py @@ -1312,3 +1312,34 @@ def run_stats(self, data=None, **options): return self._post(category='analysis', resource='run', subcategory='variant/stats', data=data, **options) + def run_walker(self, data=None, **options): + """ + Filter and walk variants from the variant storage to produce a file. + PATH: /{apiVersion}/analysis/variant/walker/run + + :param dict data: Variant walker params. (REQUIRED) + :param str include: Fields included in the response, whole JSON path + must be provided. + :param str exclude: Fields excluded in the response, whole JSON path + must be provided. + :param str project: Project [organization@]project where project can + be either the ID or the alias. + :param str study: Study [[organization@]project:]study where study and + project can be either the ID or UUID. + :param str job_id: Job ID. It must be a unique string within the + study. An ID will be autogenerated automatically if not provided. + :param str job_description: Job description. + :param str job_depends_on: Comma separated list of existing job IDs + the job will depend on. + :param str job_tags: Job tags. + :param str job_scheduled_start_time: Time when the job is scheduled to + start. + :param str job_priority: Priority of the job. + :param bool job_dry_run: Flag indicating that the job will be executed + in dry-run mode. In this mode, OpenCGA will validate that all + parameters and prerequisites are correctly set for successful + execution, but the job will not actually run. + """ + + return self._post(category='analysis', resource='run', subcategory='variant/walker', data=data, **options) + diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java b/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java index e6b7f7353c..88c06062bc 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java @@ -12,13 +12,11 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.utils.VersionUtils; - import org.opencb.opencga.core.config.storage.CellBaseConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; - import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -148,13 +146,13 @@ public void validate() throws IOException { private CellBaseConfiguration validate(boolean autoComplete) throws IOException { CellBaseConfiguration cellBaseConfiguration = getCellBaseConfiguration(); String inputVersion = getVersion(); - CellBaseDataResponse species; + SpeciesProperties species; try { species = retryMetaSpecies(); } catch (RuntimeException e) { throw new IllegalArgumentException("Unable to access cellbase url '" + getURL() + "', version '" + inputVersion + "'", e); } - if (species == null || species.firstResult() == null) { + if (species == null) { if (autoComplete && !cellBaseConfiguration.getVersion().startsWith("v")) { // Version might be missing the starting "v" cellBaseConfiguration.setVersion("v" + cellBaseConfiguration.getVersion()); @@ -162,10 +160,10 @@ private CellBaseConfiguration validate(boolean autoComplete) throws IOException species = retryMetaSpecies(); } } - if (species == null || species.firstResult() == null) { + if (species == null) { throw new IllegalArgumentException("Unable to access cellbase url '" + getURL() + "', version '" + inputVersion + "'"); } - validateSpeciesAssembly(species.firstResult()); + validateSpeciesAssembly(species); String serverVersion = getVersionFromServer(); if (!supportsDataRelease(serverVersion)) { @@ -324,14 +322,18 @@ public String getVersionFromServer() throws IOException { } private ObjectMap retryMetaAbout() throws IOException { - return retry(3, () -> cellBaseClient.getMetaClient().about().firstResult()); + return retry("meta/about", () -> cellBaseClient.getMetaClient().about().firstResult()); } - private CellBaseDataResponse retryMetaSpecies() throws IOException { - return retry(3, () -> cellBaseClient.getMetaClient().species()); + private SpeciesProperties retryMetaSpecies() throws IOException { + return retry("meta/species", () -> cellBaseClient.getMetaClient().species().firstResult()); } - private T retry(int retries, Callable function) throws IOException { + private T retry(String name, Callable function) throws IOException { + return retry(name, function, 3); + } + + private T retry(String name, Callable function, int retries) throws IOException { if (retries <= 0) { return null; } @@ -345,8 +347,8 @@ private T retry(int retries, Callable function) throws IOException { if (result == null) { try { // Retry - logger.warn("Unable to get reach cellbase " + toString() + ". Retrying..."); - result = retry(retries - 1, function); + logger.warn("Unable to get '{}' from cellbase " + toString() + ". Retrying...", name); + result = retry(name, function, retries - 1); } catch (Exception e1) { if (e == null) { e = e1; @@ -359,7 +361,6 @@ private T retry(int retries, Callable function) throws IOException { throw new IOException("Error reading from cellbase " + toString(), e); } } - } return result; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/ConfigurationOption.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/ConfigurationOption.java index 20024e9152..8101aa8c2e 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/ConfigurationOption.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/ConfigurationOption.java @@ -6,6 +6,10 @@ public interface ConfigurationOption { T defaultValue(); + default boolean isProtected() { + return false; + } + // default boolean isFinal() { // return false; // } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java new file mode 100644 index 0000000000..ef541690fc --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java @@ -0,0 +1,75 @@ +package org.opencb.opencga.core.models.variant; + +public class VariantWalkerParams extends VariantQueryParams { + public static final String DESCRIPTION = "Variant walker params"; + private String outdir; + private String outputFileName; + private String fileFormat; + private String dockerImage; + private String commandLine; + private String include; + private String exclude; + + public String getOutdir() { + return outdir; + } + + public VariantWalkerParams setOutdir(String outdir) { + this.outdir = outdir; + return this; + } + + public String getOutputFileName() { + return outputFileName; + } + + public VariantWalkerParams setOutputFileName(String outputFileName) { + this.outputFileName = outputFileName; + return this; + } + + public String getFileFormat() { + return fileFormat; + } + + public VariantWalkerParams setFileFormat(String fileFormat) { + this.fileFormat = fileFormat; + return this; + } + + public String getDockerImage() { + return dockerImage; + } + + public VariantWalkerParams setDockerImage(String dockerImage) { + this.dockerImage = dockerImage; + return this; + } + + public String getCommandLine() { + return commandLine; + } + + public VariantWalkerParams setCommandLine(String commandLine) { + this.commandLine = commandLine; + return this; + } + + public String getInclude() { + return include; + } + + public VariantWalkerParams setInclude(String include) { + this.include = include; + return this; + } + + public String getExclude() { + return exclude; + } + + public VariantWalkerParams setExclude(String exclude) { + this.exclude = exclude; + return this; + } +} diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/AnalysisWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/AnalysisWebService.java index 7fa1ebe6dd..dc19bbe85b 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/AnalysisWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/AnalysisWebService.java @@ -16,7 +16,6 @@ package org.opencb.opencga.server.rest.analysis; -import org.opencb.opencga.catalog.managers.JobManager; import org.opencb.opencga.core.exceptions.VersionException; import org.opencb.opencga.server.rest.OpenCGAWSServer; @@ -31,8 +30,6 @@ */ public class AnalysisWebService extends OpenCGAWSServer { - protected JobManager jobManager; - public AnalysisWebService(@Context UriInfo uriInfo, @Context HttpServletRequest httpServletRequest, @Context HttpHeaders httpHeaders) throws IOException, VersionException { this(uriInfo.getPathParameters().getFirst("apiVersion"), uriInfo, httpServletRequest, httpHeaders); @@ -41,8 +38,6 @@ public AnalysisWebService(@Context UriInfo uriInfo, @Context HttpServletRequest public AnalysisWebService(String apiVersion, @Context UriInfo uriInfo, @Context HttpServletRequest httpServletRequest, @Context HttpHeaders httpHeaders) throws IOException, VersionException { super(apiVersion, uriInfo, httpServletRequest, httpHeaders); - - this.jobManager = catalogManager.getJobManager(); } } diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java index 70748fffb0..5c824ac21c 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java @@ -31,6 +31,7 @@ import org.opencb.opencga.analysis.individual.qc.IndividualQcAnalysis; import org.opencb.opencga.analysis.sample.qc.SampleQcAnalysis; import org.opencb.opencga.analysis.variant.VariantExportTool; +import org.opencb.opencga.analysis.variant.VariantWalkerTool; import org.opencb.opencga.analysis.variant.circos.CircosAnalysis; import org.opencb.opencga.analysis.variant.circos.CircosLocalAnalysisExecutor; import org.opencb.opencga.analysis.variant.genomePlot.GenomePlotAnalysis; @@ -411,6 +412,27 @@ public Response export( return submitJob(VariantExportTool.ID, project, study, params, jobName, jobDescription, dependsOn, jobTags, scheduledStartTime, jobPriority, dryRun); } + @POST + @Path("/walker/run") + @ApiOperation(value = VariantWalkerTool.DESCRIPTION, response = Job.class) + @ApiImplicitParams({ + @ApiImplicitParam(name = QueryOptions.INCLUDE, value = ParamConstants.INCLUDE_DESCRIPTION, example = "name,attributes", dataType = "string", paramType = "query"), + @ApiImplicitParam(name = QueryOptions.EXCLUDE, value = ParamConstants.EXCLUDE_DESCRIPTION, example = "id,status", dataType = "string", paramType = "query"), + }) + public Response walker( + @ApiParam(value = ParamConstants.PROJECT_DESCRIPTION) @QueryParam(ParamConstants.PROJECT_PARAM) String project, + @ApiParam(value = ParamConstants.STUDY_DESCRIPTION) @QueryParam(ParamConstants.STUDY_PARAM) String study, + @ApiParam(value = ParamConstants.JOB_ID_CREATION_DESCRIPTION) @QueryParam(ParamConstants.JOB_ID) String jobName, + @ApiParam(value = ParamConstants.JOB_DESCRIPTION_DESCRIPTION) @QueryParam(ParamConstants.JOB_DESCRIPTION) String jobDescription, + @ApiParam(value = ParamConstants.JOB_DEPENDS_ON_DESCRIPTION) @QueryParam(JOB_DEPENDS_ON) String dependsOn, + @ApiParam(value = ParamConstants.JOB_TAGS_DESCRIPTION) @QueryParam(ParamConstants.JOB_TAGS) String jobTags, + @ApiParam(value = ParamConstants.JOB_SCHEDULED_START_TIME_DESCRIPTION) @QueryParam(ParamConstants.JOB_SCHEDULED_START_TIME) String scheduledStartTime, + @ApiParam(value = ParamConstants.JOB_PRIORITY_DESCRIPTION) @QueryParam(ParamConstants.SUBMIT_JOB_PRIORITY_PARAM) String jobPriority, + @ApiParam(value = ParamConstants.JOB_DRY_RUN_DESCRIPTION) @QueryParam(ParamConstants.JOB_DRY_RUN) Boolean dryRun, + @ApiParam(value = VariantWalkerParams.DESCRIPTION, required = true) VariantWalkerParams params) { + return submitJob(VariantWalkerTool.ID, project, study, params, jobName, jobDescription, dependsOn, jobTags, scheduledStartTime, jobPriority, dryRun); + } + @GET @Path("/annotation/query") @ApiOperation(value = "Query variant annotations from any saved versions", response = VariantAnnotation.class) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 77327d9d76..bf46887740 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -34,7 +34,6 @@ import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams; import org.opencb.opencga.core.models.operations.variant.VariantAggregateParams; import org.opencb.opencga.core.models.variant.VariantSetupParams; -import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.StorageEngine; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; @@ -57,9 +56,11 @@ import org.opencb.opencga.storage.core.variant.io.VariantExporter; import org.opencb.opencga.storage.core.variant.io.VariantImporter; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.*; import org.opencb.opencga.storage.core.variant.score.VariantScoreFormatDescriptor; @@ -284,6 +285,48 @@ public List exportData(URI outputFile, VariantOutputFormat outputFormat, UR return exporter.export(outputFile, outputFormat, variantsFile, parsedVariantQuery); } + public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, + String dockerImage, String commandLine) + throws IOException, StorageEngineException { + if (format == VariantWriterFactory.VariantOutputFormat.VCF || format == VariantWriterFactory.VariantOutputFormat.VCF_GZ) { + if (!isValidParam(query, VariantQueryParam.UNKNOWN_GENOTYPE)) { + query.put(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "./."); + } + } + commandLine = commandLine.replace("'", "'\"'\"'"); + + String memory = getOptions().getString(WALKER_DOCKER_MEMORY.key(), WALKER_DOCKER_MEMORY.defaultValue()); + String cpu = getOptions().getString(WALKER_DOCKER_CPU.key(), WALKER_DOCKER_CPU.defaultValue()); + String user = getOptions().getString(WALKER_DOCKER_USER.key(), WALKER_DOCKER_USER.defaultValue()); + String envs = getOptions().getString(WALKER_DOCKER_ENV.key(), WALKER_DOCKER_ENV.defaultValue()); + String volume = getOptions().getString(WALKER_DOCKER_MOUNT.key(), WALKER_DOCKER_MOUNT.defaultValue()); + String opts = getOptions().getString(WALKER_DOCKER_OPTS.key(), WALKER_DOCKER_OPTS.defaultValue()); + + String dockerCommandLine = "docker run --rm -i " + + "--memory " + memory + " " + + "--cpus " + cpu + " " + + "--user " + user + " "; + + if (StringUtils.isNotEmpty(volume)) { + dockerCommandLine += "-v " + volume + ":/data "; + } + + if (StringUtils.isNotEmpty(envs)) { + for (String s : envs.split(",")) { + dockerCommandLine += "--env " + s + " "; + } + } + dockerCommandLine = dockerCommandLine + + opts + + dockerImage + " bash -ce '" + commandLine + "'"; + return walkData(outputFile, format, query, queryOptions, dockerCommandLine); + } + + + public abstract List walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, + String commandLine) + throws StorageEngineException; + /** * Creates a new {@link VariantExporter} for the current backend. * The default implementation iterates locally through the database. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index 73abcbae61..f7736bd5b1 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -100,6 +100,13 @@ public enum VariantStorageOptions implements ConfigurationOption { QUERY_SAMPLE_LIMIT_DEFAULT("query.sample.limit.default", 100), QUERY_SAMPLE_LIMIT_MAX("query.sample.limit.max", 1000), + WALKER_DOCKER_MEMORY("walker.docker.memory", "512m", true), + WALKER_DOCKER_CPU("walker.docker.cpu", "1", true), + WALKER_DOCKER_USER("walker.docker.user", "root", true), + WALKER_DOCKER_ENV("walker.docker.env", "", true), + WALKER_DOCKER_MOUNT("walker.docker.mount", "", true), + WALKER_DOCKER_OPTS("walker.docker.opts", "", true), + // Search intersect options INTERSECT_ACTIVE("search.intersect.active", true), // Allow intersect queries with the SearchEngine (Solr) INTERSECT_ALWAYS("search.intersect.always", false), // Force intersect queries @@ -133,15 +140,24 @@ public enum VariantStorageOptions implements ConfigurationOption { private final String key; private final Object value; + private final boolean isProtected; VariantStorageOptions(String key) { this.key = key; this.value = null; + this.isProtected = false; } VariantStorageOptions(String key, Object value) { this.key = key; this.value = value; + this.isProtected = false; + } + + VariantStorageOptions(String key, Object value, boolean isProtected) { + this.key = key; + this.value = value; + this.isProtected = isProtected; } public String key() { @@ -153,4 +169,10 @@ public T defaultValue() { return (T) value; } + @Override + public boolean isProtected() { + return isProtected; + } + + } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java index 509207e5be..fa002facbd 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java @@ -122,6 +122,14 @@ public boolean isSnappy() { return extension.endsWith(".snappy"); } + public VariantOutputFormat inPlan() { + if (!isPlain()) { + return VariantOutputFormat.valueOf(name().replace("_GZ", "").replace("_SNAPPY", "")); + } else { + return this; + } + } + public VariantOutputFormat withGzip() { try { if (isGzip()) { diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java index 55903d221f..e10370dcaa 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java @@ -19,6 +19,8 @@ import org.opencb.biodata.models.variant.metadata.VariantMetadata; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.config.DatabaseCredentials; import org.opencb.opencga.core.config.storage.StorageConfiguration; import org.opencb.opencga.core.config.storage.StorageEngineConfiguration; @@ -32,6 +34,7 @@ import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.io.VariantImporter; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.score.VariantScoreFormatDescriptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -139,6 +142,11 @@ public void importData(URI input, VariantMetadata metadata, List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { + throw new UnsupportedOperationException("Unable to walk data in " + getStorageEngineId()); + } + @Override public void removeFiles(String study, List files, URI outdir) throws StorageEngineException { TaskMetadata task = preRemove(study, files, Collections.emptyList()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 49fdbf0223..4e9fe7057a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -29,10 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; +import java.io.*; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Paths; @@ -41,6 +38,8 @@ import java.util.List; import java.util.Map; import java.util.function.Supplier; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import static org.opencb.opencga.core.common.IOUtils.humanReadableByteCount; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.MR_EXECUTOR_SSH_PASSWORD; @@ -469,16 +468,28 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool LOGGER.info(" Source : " + mrOutdir.toUri()); LOGGER.info(" Target : " + localOutput.toUri()); LOGGER.info(" ---- "); - try (FSDataOutputStream os = localOutput.getFileSystem(getConf()).create(localOutput)) { + try (FSDataOutputStream fsOs = localOutput.getFileSystem(getConf()).create(localOutput)) { + boolean isGzip = paths.get(0).getName().endsWith(".gz"); + OutputStream os; + if (isGzip) { + os = new GZIPOutputStream(fsOs); + } else { + os = fsOs; + } for (int i = 0; i < paths.size(); i++) { Path path = paths.get(i); LOGGER.info("Concat file : '{}' {} ", path.toUri(), humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); try (FSDataInputStream fsIs = fileSystem.open(path)) { - BufferedReader br; - br = new BufferedReader(new InputStreamReader(fsIs)); InputStream is; + if (isGzip) { + is = new GZIPInputStream(fsIs); + } else { + is = fsIs; + } + // Remove extra headers from all files but the first if (removeExtraHeaders && i != 0) { + BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line; do { br.mark(10 * 1024 * 1024); //10MB @@ -486,8 +497,6 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool } while (line != null && line.startsWith("#")); br.reset(); is = new ReaderInputStream(br, Charset.defaultCharset()); - } else { - is = fsIs; } IOUtils.copyBytes(is, os, getConf(), false); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/ValueOnlyTextOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/ValueOnlyTextOutputFormat.java new file mode 100644 index 0000000000..9d1759cf73 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/ValueOnlyTextOutputFormat.java @@ -0,0 +1,33 @@ +package org.opencb.opencga.storage.hadoop.utils; + +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; + +import java.io.IOException; + +public class ValueOnlyTextOutputFormat extends TextOutputFormat { + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { + return new ValueOnlyRecordWriter(super.getRecordWriter(job)); + } + + private class ValueOnlyRecordWriter extends RecordWriter { + private final RecordWriter recordWriter; + + ValueOnlyRecordWriter(RecordWriter recordWriter) { + this.recordWriter = recordWriter; + } + + @Override + public void write(K key, V value) throws IOException, InterruptedException { + recordWriter.write(null, value); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + recordWriter.close(context); + } + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 84f00b042e..fdee34d313 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -59,6 +59,7 @@ import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; import org.opencb.opencga.storage.core.variant.io.VariantExporter; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.executors.*; @@ -94,6 +95,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDeleteHBaseColumnTask; import org.opencb.opencga.storage.hadoop.variant.io.HadoopVariantExporter; +import org.opencb.opencga.storage.hadoop.variant.mr.StreamVariantDriver; import org.opencb.opencga.storage.hadoop.variant.prune.VariantPruneManager; import org.opencb.opencga.storage.hadoop.variant.score.HadoopVariantScoreLoader; import org.opencb.opencga.storage.hadoop.variant.score.HadoopVariantScoreRemover; @@ -314,6 +316,23 @@ protected VariantExporter newVariantExporter(VariantMetadataFactory metadataFact return new HadoopVariantExporter(this, metadataFactory, getMRExecutor(), ioConnectorProvider); } + @Override + public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, + Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { + ParsedVariantQuery variantQuery = parseQuery(query, queryOptions); + int studyId = variantQuery.getStudyQuery().getDefaultStudy().getId(); + getMRExecutor().run(StreamVariantDriver.class, StreamVariantDriver.buildArgs( + null, + getVariantTableName(), studyId, null, + new ObjectMap().appendAll(variantQuery.getQuery()).appendAll(variantQuery.getInputOptions()) + .append(StreamVariantDriver.MAX_BYTES_PER_MAP_PARAM, 1024 * 10) + .append(StreamVariantDriver.COMMAND_LINE_BASE64_PARAM, Base64.getEncoder().encodeToString(commandLine.getBytes())) + .append(StreamVariantDriver.INPUT_FORMAT_PARAM, format.toString()) + .append(StreamVariantDriver.OUTPUT_PARAM, outputFile) + ), ""); + return null; + } + @Override public void deleteStats(String study, Collection cohorts, ObjectMap params) throws StorageEngineException { ObjectMap options = getMergedOptions(params); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java new file mode 100644 index 0000000000..755275263a --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -0,0 +1,177 @@ +package org.opencb.opencga.storage.hadoop.variant.io; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; +import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHBaseQueryParser; +import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantSqlQueryParser; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; +import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; +import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil.getQueryFromConfig; +import static org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil.getQueryOptionsFromConfig; + +/** + * Created on 14/06/18. + * + * export HADOOP_USER_CLASSPATH_FIRST=true + * hbase_conf=$(hbase classpath | tr ":" "\n" | grep "/conf" | tr "\n" ":") + * export HADOOP_CLASSPATH=${hbase_conf}:$PWD/libs/avro-1.7.7.jar:$PWD/libs/jackson-databind-2.6.6.jar:$PWD/libs/jackson-core-2.6.6.jar + * export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$PWD/libs/jackson-annotations-2.6.6.jar + * yarn jar opencga-storage-hadoop-core-1.4.0-jar-with-dependencies.jar \ + * org.opencb.opencga.storage.hadoop.variant.io.VariantExporterDriver \ + * opencga_variants study myStudy --of avro --output my.variants.avro --region 22 + * + * @author Jacobo Coll <jacobo167@gmail.com> + */ +public abstract class VariantDriver extends AbstractVariantsTableDriver { + + public static final String OUTPUT_PARAM = "output"; + public static final String CONCAT_OUTPUT_PARAM = "concat-output"; + private Path outdir; + private Path localOutput; + private Query query = new Query(); + private QueryOptions options = new QueryOptions(); + private static Logger logger = LoggerFactory.getLogger(VariantDriver.class); + protected boolean useReduceStep; + + @Override + protected void parseAndValidateParameters() throws IOException { + setStudyId(-1); + super.parseAndValidateParameters(); + String outdirStr = getParam(OUTPUT_PARAM); + if (StringUtils.isEmpty(outdirStr)) { + throw new IllegalArgumentException("Missing argument " + OUTPUT_PARAM); + } + + useReduceStep = Boolean.valueOf(getParam(CONCAT_OUTPUT_PARAM)); + outdir = new Path(outdirStr); + if (isLocal(outdir)) { + localOutput = getLocalOutput(outdir); + outdir = getTempOutdir("opencga_export", localOutput.getName()); + outdir.getFileSystem(getConf()).deleteOnExit(outdir); + } + if (localOutput != null) { + useReduceStep = true; + logger.info(" * Outdir file: " + localOutput.toUri()); + logger.info(" * Temporary outdir file: " + outdir.toUri()); + } else { + logger.info(" * Outdir file: " + outdir.toUri()); + } + + getQueryFromConfig(query, getConf()); + getQueryOptionsFromConfig(options, getConf()); + + logger.info(" * Query:"); + for (Map.Entry entry : query.entrySet()) { + logger.info(" * " + entry.getKey() + " : " + entry.getValue()); + } + } + + @Override + protected abstract Class getMapperClass(); + + protected abstract Class getReducerClass(); + + protected abstract Class getOutputFormatClass(); + + protected abstract void setupJob(Job job) throws IOException; + + @Override + protected final Job setupJob(Job job, String archiveTable, String variantTable) throws IOException { + setupJob(job); + Class mapperClass = getMapperClass(); + Class reducerClass = getReducerClass(); + if (mapperClass == null) { + throw new IllegalArgumentException("Mapper class not provided!"); + } + if (useReduceStep) { + if (reducerClass == null) { + throw new IllegalArgumentException("Reducer class not provided!"); + } + } + Class outputFormatClass = getOutputFormatClass(); + if (outputFormatClass == null) { + throw new IllegalArgumentException("Output format class not provided!"); + } + job.setOutputFormatClass(outputFormatClass); + + if (useReduceStep) { + logger.info("Use one Reduce task to produce a single file"); + job.setReducerClass(reducerClass); + job.setNumReduceTasks(1); + } else { + VariantMapReduceUtil.setNoneReduce(job); + } + + VariantQueryParser variantQueryParser = new HadoopVariantQueryParser(null, getMetadataManager()); + ParsedVariantQuery variantQuery = variantQueryParser.parseQuery(query, options); + Query query = variantQuery.getQuery(); + if (VariantHBaseQueryParser.isSupportedQuery(query)) { + logger.info("Init MapReduce job reading from HBase"); + boolean useSampleIndex = !getConf().getBoolean("skipSampleIndex", false) && SampleIndexQueryParser.validSampleIndexQuery(query); + if (useSampleIndex) { + // Remove extra fields from the query + new SampleIndexDBAdaptor(getHBaseManager(), getTableNameGenerator(), getMetadataManager()).parseSampleIndexQuery(query); + + logger.info("Use sample index to read from HBase"); + } + + VariantHBaseQueryParser parser = new VariantHBaseQueryParser(getMetadataManager()); + List scans = parser.parseQueryMultiRegion(variantQuery, options); + VariantMapReduceUtil.configureMapReduceScans(scans, getConf()); + + VariantMapReduceUtil.initVariantMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex); + } else { + logger.info("Init MapReduce job reading from Phoenix"); + String sql = new VariantSqlQueryParser(variantTable, getMetadataManager(), getHelper().getConf()) + .parse(variantQuery, options); + + VariantMapReduceUtil.initVariantMapperJobFromPhoenix(job, variantTable, sql, mapperClass); + } + + setNoneTimestamp(job); + + FileOutputFormat.setOutputPath(job, outdir); // set Path + + VariantMapReduceUtil.configureVariantConverter(job.getConfiguration(), false, true, true, + query.getString(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "./.")); + + + return job; + } + + + @Override + protected void postExecution(boolean succeed) throws IOException, StorageEngineException { + super.postExecution(succeed); + if (succeed) { + if (localOutput != null) { + concatMrOutputToLocal(outdir, localOutput); + } + } + if (localOutput != null) { + deleteTemporaryFile(outdir); + } + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index aa342c10a9..c44e686e4d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -5,9 +5,6 @@ import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapreduce.AvroJob; import org.apache.avro.mapreduce.AvroKeyOutputFormat; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DeflateCodec; @@ -25,100 +22,50 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.GeneCancerAssociation; import org.opencb.biodata.models.variant.avro.VariantAvro; -import org.opencb.commons.datastore.core.Query; -import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.storage.core.exceptions.StorageEngineException; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; -import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; -import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; -import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; -import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHBaseQueryParser; -import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantSqlQueryParser; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; import org.opencb.opencga.storage.hadoop.variant.mr.VariantFileOutputFormat; -import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapper; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.logging.Handler; import java.util.logging.Level; -import static org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil.getQueryFromConfig; -import static org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil.getQueryOptionsFromConfig; - -/** - * Created on 14/06/18. - * - * export HADOOP_USER_CLASSPATH_FIRST=true - * hbase_conf=$(hbase classpath | tr ":" "\n" | grep "/conf" | tr "\n" ":") - * export HADOOP_CLASSPATH=${hbase_conf}:$PWD/libs/avro-1.7.7.jar:$PWD/libs/jackson-databind-2.6.6.jar:$PWD/libs/jackson-core-2.6.6.jar - * export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$PWD/libs/jackson-annotations-2.6.6.jar - * yarn jar opencga-storage-hadoop-core-1.4.0-jar-with-dependencies.jar \ - * org.opencb.opencga.storage.hadoop.variant.io.VariantExporterDriver \ - * opencga_variants study myStudy --of avro --output my.variants.avro --region 22 - * - * @author Jacobo Coll <jacobo167@gmail.com> - */ -public class VariantExporterDriver extends AbstractVariantsTableDriver { +public class VariantExporterDriver extends VariantDriver { public static final String OUTPUT_FORMAT_PARAM = "of"; - public static final String OUTPUT_PARAM = "output"; - public static final String CONCAT_OUTPUT_PARAM = "concat-output"; - private VariantOutputFormat outputFormat; - private Path outdir; - private Path localOutput; - private Query query = new Query(); - private QueryOptions options = new QueryOptions(); - private static Logger logger = LoggerFactory.getLogger(VariantExporterDriver.class); - private boolean useReduceStep; + private VariantWriterFactory.VariantOutputFormat outputFormat; + private Class mapperClass; + private Class reducerClass; + private Class outputFormatClass; @Override protected void parseAndValidateParameters() throws IOException { - setStudyId(-1); super.parseAndValidateParameters(); - outputFormat = VariantOutputFormat.valueOf(getParam(OUTPUT_FORMAT_PARAM, "avro").toUpperCase()); - String outdirStr = getParam(OUTPUT_PARAM); - if (StringUtils.isEmpty(outdirStr)) { - throw new IllegalArgumentException("Missing argument " + OUTPUT_PARAM); - } - useReduceStep = Boolean.valueOf(getParam(CONCAT_OUTPUT_PARAM)); - outdir = new Path(outdirStr); - if (isLocal(outdir)) { - localOutput = getLocalOutput(outdir); - outdir = getTempOutdir("opencga_export", localOutput.getName()); - outdir.getFileSystem(getConf()).deleteOnExit(outdir); - } - if (localOutput != null) { - useReduceStep = true; - logger.info(" * Outdir file: " + localOutput.toUri()); - logger.info(" * Temporary outdir file: " + outdir.toUri()); - } else { - logger.info(" * Outdir file: " + outdir.toUri()); - } + outputFormat = VariantWriterFactory.VariantOutputFormat.valueOf(getParam(OUTPUT_FORMAT_PARAM, "avro").toUpperCase()); + } - getQueryFromConfig(query, getConf()); - getQueryOptionsFromConfig(options, getConf()); + @Override + protected Class getMapperClass() { + return mapperClass; + } - logger.info(" * Query:"); - for (Map.Entry entry : query.entrySet()) { - logger.info(" * " + entry.getKey() + " : " + entry.getValue()); - } + @Override + protected Class getReducerClass() { + return reducerClass; } @Override - protected Job setupJob(Job job, String archiveTable, String variantTable) throws IOException { - Class mapperClass; - Class reducerClass; + protected Class getOutputFormatClass() { + return outputFormatClass; + } + + @Override + protected void setupJob(Job job) throws IOException { job.getConfiguration().setBoolean(JobContext.MAP_OUTPUT_COMPRESS, true); job.getConfiguration().setClass(JobContext.MAP_OUTPUT_COMPRESS_CODEC, DeflateCodec.class, CompressionCodec.class); switch (outputFormat) { @@ -127,7 +74,7 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC); // do not break case AVRO: - job.setOutputFormatClass(AvroKeyOutputFormat.class); + outputFormatClass = AvroKeyOutputFormat.class; if (useReduceStep) { job.setMapOutputKeyClass(NullWritable.class); AvroJob.setMapOutputValueSchema(job, VariantAvro.getClassSchema()); @@ -148,7 +95,7 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws ParquetOutputFormat.setCompression(job, CompressionCodecName.GZIP); // do not break case PARQUET: - job.setOutputFormatClass(AvroParquetOutputFormat.class); + outputFormatClass = AvroParquetOutputFormat.class; AvroParquetOutputFormat.setSchema(job, VariantAvro.getClassSchema()); if (useReduceStep) { job.setMapOutputKeyClass(NullWritable.class); @@ -176,69 +123,13 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws } else if (outputFormat.isSnappy()) { FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); // compression } - job.setOutputFormatClass(VariantFileOutputFormat.class); + outputFormatClass = VariantFileOutputFormat.class; + job.getConfiguration().set(VariantFileOutputFormat.VARIANT_OUTPUT_FORMAT, outputFormat.name()); job.setOutputKeyClass(Variant.class); break; } - - if (useReduceStep) { - logger.info("Use one Reduce task to produce a single file"); - job.setReducerClass(reducerClass); - job.setNumReduceTasks(1); - } else { - VariantMapReduceUtil.setNoneReduce(job); - } - - VariantQueryParser variantQueryParser = new HadoopVariantQueryParser(null, getMetadataManager()); - ParsedVariantQuery variantQuery = variantQueryParser.parseQuery(query, options); - Query query = variantQuery.getQuery(); - if (VariantHBaseQueryParser.isSupportedQuery(query)) { - logger.info("Init MapReduce job reading from HBase"); - boolean useSampleIndex = !getConf().getBoolean("skipSampleIndex", false) && SampleIndexQueryParser.validSampleIndexQuery(query); - if (useSampleIndex) { - // Remove extra fields from the query - new SampleIndexDBAdaptor(getHBaseManager(), getTableNameGenerator(), getMetadataManager()).parseSampleIndexQuery(query); - - logger.info("Use sample index to read from HBase"); - } - - VariantHBaseQueryParser parser = new VariantHBaseQueryParser(getMetadataManager()); - List scans = parser.parseQueryMultiRegion(variantQuery, options); - VariantMapReduceUtil.configureMapReduceScans(scans, getConf()); - - VariantMapReduceUtil.initVariantMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex); - } else { - logger.info("Init MapReduce job reading from Phoenix"); - String sql = new VariantSqlQueryParser(variantTable, getMetadataManager(), getHelper().getConf()) - .parse(variantQuery, options); - - VariantMapReduceUtil.initVariantMapperJobFromPhoenix(job, variantTable, sql, mapperClass); - } - - setNoneTimestamp(job); - - FileOutputFormat.setOutputPath(job, outdir); // set Path - - VariantMapReduceUtil.configureVariantConverter(job.getConfiguration(), false, true, true, - query.getString(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "./.")); - - job.getConfiguration().set(VariantFileOutputFormat.VARIANT_OUTPUT_FORMAT, outputFormat.name()); - - return job; } - @Override - protected void postExecution(boolean succeed) throws IOException, StorageEngineException { - super.postExecution(succeed); - if (succeed) { - if (localOutput != null) { - concatMrOutputToLocal(outdir, localOutput); - } - } - if (localOutput != null) { - deleteTemporaryFile(outdir); - } - } @Override protected String getJobOperationName() { @@ -247,9 +138,9 @@ protected String getJobOperationName() { /** * Mapper to convert to Variant. - * The output of this mapper should be connected directly to the {@link VariantOutputFormat} + * The output of this mapper should be connected directly to the {@link VariantWriterFactory.VariantOutputFormat} * This mapper can not work with a reduce step. - * @see VariantOutputFormat + * @see VariantWriterFactory.VariantOutputFormat */ public static class VariantExporterDirectMapper extends VariantMapper { @Override @@ -308,9 +199,9 @@ protected void map(Object key, Variant value, Context context) throws IOExceptio /** * Reducer to join all VariantAvro and generate Variants. - * The output of this reducer should be connected to the {@link VariantOutputFormat} + * The output of this reducer should be connected to the {@link VariantWriterFactory.VariantOutputFormat} * @see AvroVariantExporterMapper - * @see VariantOutputFormat + * @see VariantWriterFactory.VariantOutputFormat */ public static class VariantExporterReducer extends Reducer, Variant, NullWritable> { @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java new file mode 100644 index 0000000000..bb31552ad6 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -0,0 +1,140 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.DeflateCodec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.Tool; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.hadoop.utils.ValueOnlyTextOutputFormat; +import org.opencb.opencga.storage.hadoop.variant.io.VariantDriver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.Map; + +public class StreamVariantDriver extends VariantDriver { + + public static final String INPUT_FORMAT_PARAM = "inputFormat"; + public static final String COMMAND_LINE_PARAM = "commandLine"; + public static final String COMMAND_LINE_BASE64_PARAM = "commandLineBase64"; + public static final String MAX_BYTES_PER_MAP_PARAM = "maxBytesPerMap"; + + private VariantWriterFactory.VariantOutputFormat format; + private int maxBytesPerMap; + private static Logger logger = LoggerFactory.getLogger(StreamVariantDriver.class); + private String commandLine; + + private Class mapperClass; + private Class reducerClass; + private Class outputFormatClass; + + @Override + protected Map getParams() { + Map params = super.getParams(); + params.put(INPUT_FORMAT_PARAM, ""); + params.put(COMMAND_LINE_PARAM, ""); + params.put(COMMAND_LINE_BASE64_PARAM, ""); + + return params; + } + + @Override + protected void parseAndValidateParameters() throws IOException { + super.parseAndValidateParameters(); + + String inputFormat = getParam(INPUT_FORMAT_PARAM); + if (inputFormat == null) { + throw new IllegalArgumentException("Missing input format!"); + } + format = VariantWriterFactory.toOutputFormat(inputFormat, ""); + if (format == null) { + throw new IllegalArgumentException("Unknown input format " + inputFormat); + } + maxBytesPerMap = Integer.parseInt(getParam(MAX_BYTES_PER_MAP_PARAM, String.valueOf(1024 * 1024 * 1024))); + + commandLine = getParam(COMMAND_LINE_PARAM); + String commandLineBase64 = getParam(COMMAND_LINE_BASE64_PARAM); + if (commandLine == null && commandLineBase64 == null) { + throw new IllegalArgumentException("Missing command line!"); + } + if (commandLine != null && commandLineBase64 != null) { + throw new IllegalArgumentException("Only one of '" + COMMAND_LINE_PARAM + "' or '" + COMMAND_LINE_BASE64_PARAM + "'" + + " is allowed!"); + } + + if (commandLineBase64 != null) { + commandLine = new String(java.util.Base64.getDecoder().decode(commandLineBase64)); + } + + String outdirStr = getParam(OUTPUT_PARAM); + if (StringUtils.isEmpty(outdirStr)) { + throw new IllegalArgumentException("Missing argument " + OUTPUT_PARAM); + } + } + + @Override + protected Class getMapperClass() { + return mapperClass; + } + + @Override + protected Class getReducerClass() { + return reducerClass; + } + + @Override + protected Class getOutputFormatClass() { + return outputFormatClass; + } + + @Override + protected void setupJob(Job job) throws IOException { + + job.getConfiguration().setBoolean(JobContext.MAP_OUTPUT_COMPRESS, true); + job.getConfiguration().setClass(JobContext.MAP_OUTPUT_COMPRESS_CODEC, DeflateCodec.class, CompressionCodec.class); + + Class keyClass = ImmutableBytesWritable.class; +// Class keyClass = NullWritable.class; +// Class keyClass = Text.class; + Class valueClass = Text.class; + + mapperClass = StreamVariantMapper.class; + job.setMapOutputKeyClass(keyClass); + job.setMapOutputValueClass(valueClass); + + StreamVariantMapper.setCommandLine(job, commandLine); + StreamVariantMapper.setVariantFormat(job, format); + StreamVariantMapper.setMaxInputBytesPerProcess(job, maxBytesPerMap); + + reducerClass = Reducer.class; + + outputFormatClass = ValueOnlyTextOutputFormat.class; + job.setOutputFormatClass(ValueOnlyTextOutputFormat.class); + TextOutputFormat.setCompressOutput(job, true); + TextOutputFormat.setOutputCompressorClass(job, GzipCodec.class); +// TextOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); + job.setOutputKeyClass(keyClass); + job.setOutputValueClass(valueClass); + } + + @Override + protected String getJobOperationName() { + return "stream-variants"; + } + + @SuppressWarnings("unchecked") + public static void main(String[] args) { + main(args, (Class) MethodHandles.lookup().lookupClass()); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java new file mode 100644 index 0000000000..163d6bd964 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -0,0 +1,394 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.LineReader; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.io.DataWriter; +import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; + +import java.io.*; +import java.util.Base64; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import static org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper.COUNTER_GROUP_NAME; + +public class StreamVariantMapper extends VariantMapper { + private static final Log LOG = LogFactory.getLog(StreamVariantMapper.class); + + private static final int BUFFER_SIZE = 128 * 1024; + public static final String MAX_INPUT_BYTES_PER_PROCESS = "stream.maxInputBytesPerProcess"; + public static final String VARIANT_FORMAT = "stream.variant.format"; + public static final String STREAMPROCESSOR = "stream.map.streamprocessor"; + + private final boolean verboseStdout = false; + private static final long REPORTER_OUT_DELAY = 10 * 1000L; + private static final long REPORTER_ERR_DELAY = 10 * 1000L; + + // Configured at SETUP + private String commandLine; + private int maxInputBytesPerProcess; + private VariantWriterFactory.VariantOutputFormat format; + private Map envs; + private VariantStorageMetadataManager metadataManager; + private VariantWriterFactory writerFactory; + private Query query; + private QueryOptions options; + // Keep an auto-incremental number for each produced record. This is used as the key for the output record, + // and will ensure a sorted output. + private int outputKeyNum; + + // Configured for every new process + private Process process; + private DataOutputStream stdin; + private DataInputStream stdout; + private DataInputStream stderr; + private MRErrorThread stderrThread; + private MROutputThread stdoutThread; + private DataWriter variantDataWriter; + private int processedBytes = 0; + private long numRecordsRead = 0; + private long numRecordsWritten = 0; + protected final AtomicReference throwable = new AtomicReference<>(); + + private volatile boolean processProvidedStatus_ = false; + + public static void setCommandLine(Job job, String commandLine) { + String commandLineBase64 = Base64.getEncoder().encodeToString(commandLine.getBytes()); + job.getConfiguration().set(STREAMPROCESSOR, commandLineBase64); + } + + public static void setVariantFormat(Job job, VariantWriterFactory.VariantOutputFormat format) { + job.getConfiguration().set(VARIANT_FORMAT, format.toString()); + } + + public static void setMaxInputBytesPerProcess(Job job, int maxInputBytesPerProcess) { + job.getConfiguration().setInt(MAX_INPUT_BYTES_PER_PROCESS, maxInputBytesPerProcess); + } + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + Configuration conf = context.getConfiguration(); + commandLine = new String(Base64.getDecoder().decode(conf.get(STREAMPROCESSOR))); + maxInputBytesPerProcess = conf.getInt(MAX_INPUT_BYTES_PER_PROCESS, 1024 * 1024 * 1024); + format = VariantWriterFactory.toOutputFormat(conf.get(VARIANT_FORMAT), ""); + if (!format.isPlain()) { + format = format.inPlan(); + } + + + envs = new HashMap<>(); + addEnvironment(envs, conf.get("stream.addenvironment")); + // add TMPDIR environment variable with the value of java.io.tmpdir + envs.put("TMPDIR", System.getProperty("java.io.tmpdir")); + + VariantTableHelper helper = new VariantTableHelper(conf); + metadataManager = new VariantStorageMetadataManager(new HBaseVariantStorageMetadataDBAdaptorFactory(helper)); + writerFactory = new VariantWriterFactory(metadataManager); + query = VariantMapReduceUtil.getQueryFromConfig(conf); + options = VariantMapReduceUtil.getQueryOptionsFromConfig(conf); + outputKeyNum = context.getCurrentKey().hashCode(); + } + + @Override + public void run(Context context) throws IOException, InterruptedException { + if (context.nextKeyValue()) { + try { + setup(context); + startProcess(context); + // Do-while instead of "while", as we've already called context.nextKeyValue() once + do { + if (processedBytes > maxInputBytesPerProcess) { + LOG.info("Processed bytes = " + processedBytes + " > " + maxInputBytesPerProcess + ". Restarting process."); + context.getCounter(COUNTER_GROUP_NAME, "RESTARTED_PROCESS").increment(1); + closeProcess(context); + startProcess(context); + } + map(context.getCurrentKey(), context.getCurrentValue(), context); + } while (!hasExceptions() && context.nextKeyValue()); + } catch (Throwable th) { + setException(th); + } + try { + // Always call cleanup, even if there was an exception + cleanup(context); + } catch (Throwable th) { + setException(th); + } + } else { + context.getCounter(COUNTER_GROUP_NAME, "EMPTY_INPUT_SPLIT").increment(1); + } + throwExceptionIfAny(); + } + + private boolean hasExceptions() { + return throwable.get() != null; + } + + private void setException(Throwable th) { + if (!throwable.compareAndSet(null, th)) { + synchronized (throwable) { + // addSuppressed is not thread safe + throwable.get().addSuppressed(th); + } + } + LOG.warn("{}", th); + } + + private void throwExceptionIfAny() throws IOException { + if (hasExceptions()) { + Throwable cause = throwable.get(); + throwable.set(null); + throw new IOException("MROutput/MRErrThread failed:", cause); + } + } + + @Override + protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { + closeProcess(context); + super.cleanup(context); + } + + @Override + protected void map(Object key, Variant value, Context context) throws IOException, InterruptedException { + numRecordsRead++; + variantDataWriter.write(value); + stdin.flush(); + processedBytes = stdin.size(); + } + + private void closeProcess(Context context) throws IOException, InterruptedException { + + try { + if (variantDataWriter != null) { + variantDataWriter.post(); + variantDataWriter.close(); + } + + // Close stdin to the process. This will cause the process to finish. + if (stdin != null) { + stdin.close(); + stdin = null; + } + + if (process != null) { + // Wait for the process to finish + int exitVal = process.waitFor(); + + if (exitVal != 0) { + LOG.error("Process exited with code " + exitVal); + throw new IOException("Process exited with code " + exitVal); + } + process = null; + } + } catch (Throwable th) { + setException(th); + } + + try { + if (stdout != null) { + stdoutThread.join(); + stdout.close(); + stdout = null; + } + } catch (Throwable th) { + setException(th); + } + try { + if (stderr != null) { + stderrThread.join(); + stderr.close(); + stderr = null; + } + } catch (Throwable th) { + setException(th); + } +// drainStdout(context); + } + + private void startProcess(Context context) throws IOException { + LOG.info("bash -ce '" + commandLine + "'"); + context.getCounter(COUNTER_GROUP_NAME, "START_PROCESS").increment(1); + + // Start the process + ProcessBuilder builder = new ProcessBuilder("bash", "-ce", commandLine); + builder.environment().putAll(envs); + process = builder.start(); + + stdin = new DataOutputStream(new BufferedOutputStream( + process.getOutputStream(), + BUFFER_SIZE)); + stdout = new DataInputStream(new BufferedInputStream( + process.getInputStream(), + BUFFER_SIZE)); + + stderr = new DataInputStream(new BufferedInputStream(process.getErrorStream())); + + stderrThread = new MRErrorThread(context); + stdoutThread = new MROutputThread(context); + stderrThread.start(); + stdoutThread.start(); + + variantDataWriter = writerFactory.newDataWriter(format, stdin, new Query(query), new QueryOptions(options)); + + processedBytes = 0; + numRecordsRead = 0; + numRecordsWritten = 0; + throwable.set(null); + + variantDataWriter.open(); + variantDataWriter.pre(); + stdin.flush(); + + } + + void addEnvironment(Map env, String nameVals) { + // encoding "a=b c=d" from StreamJob + if (nameVals == null) { + return; + } + String[] nv = nameVals.split(" "); + for (int i = 0; i < nv.length; i++) { + String[] pair = nv[i].split("=", 2); + if (pair.length != 2) { + LOG.info("Skip env entry:" + nv[i]); + } else { + env.put(pair[0], pair[1]); + } + } + } + + + private class MROutputThread extends Thread { + + private final Mapper.Context context; + private long lastStdoutReport = 0; + + MROutputThread(Context context) { + this.context = context; + setDaemon(true); + } + + public void run() { + Text line = new Text(); + LineReader stdoutLineReader = new LineReader(stdout); + try { + while (stdoutLineReader.readLine(line) > 0) { + context.write(new ImmutableBytesWritable(Bytes.toBytes(outputKeyNum++)), line); +// context.write(null, line); + if (verboseStdout) { + LOG.info("[STDOUT] - " + line); + } + numRecordsWritten++; + long now = System.currentTimeMillis(); + if (now - lastStdoutReport > REPORTER_OUT_DELAY) { + lastStdoutReport = now; + String hline = "Records R/W=" + numRecordsRead + "/" + numRecordsWritten; + if (!processProvidedStatus_) { + context.setStatus(hline); + } else { + context.progress(); + } + LOG.info(hline); + } + } + } catch (Throwable th) { + setException(th); + } + } + } + + private class MRErrorThread extends Thread { + + private final Configuration conf; + private final Mapper.Context context; + private long lastStderrReport = 0; + private final String reporterPrefix; + private final String counterPrefix; + private final String statusPrefix; + + MRErrorThread(Context context) { + this.context = context; + this.conf = context.getConfiguration(); + this.reporterPrefix = conf.get("stream.stderr.reporter.prefix", "reporter:"); + this.counterPrefix = reporterPrefix + "counter:"; + this.statusPrefix = reporterPrefix + "status:"; + setDaemon(true); + } + + public void run() { + Text line = new Text(); + LineReader stderrLineReader = new LineReader(stderr); + try { + while (stderrLineReader.readLine(line) > 0) { + String lineStr = line.toString(); + if (matchesReporter(lineStr)) { + if (matchesCounter(lineStr)) { + incrCounter(lineStr); + } else if (matchesStatus(lineStr)) { + processProvidedStatus_ = true; + setStatus(lineStr); + } else { + LOG.warn("Cannot parse reporter line: " + lineStr); + } + } else { + LOG.info("[STDERR] - " + lineStr); +// System.err.println(lineStr); + } + long now = System.currentTimeMillis(); + if (now - lastStderrReport > REPORTER_ERR_DELAY) { + lastStderrReport = now; + context.progress(); + } + line.clear(); + } + } catch (Throwable th) { + setException(th); + } + } + + private boolean matchesReporter(String line) { + return line.startsWith(reporterPrefix); + } + + private boolean matchesCounter(String line) { + return line.startsWith(counterPrefix); + } + + private boolean matchesStatus(String line) { + return line.startsWith(statusPrefix); + } + + private void incrCounter(String line) { + String trimmedLine = line.substring(counterPrefix.length()).trim(); + String[] columns = trimmedLine.split(","); + if (columns.length == 2) { + try { + context.getCounter(COUNTER_GROUP_NAME, columns[0]).increment(Long.parseLong(columns[1])); + } catch (NumberFormatException e) { + LOG.warn("Cannot parse counter increment '" + columns[1] + "' from line: " + line); + } + } else { + LOG.warn("Cannot parse counter line: " + line); + } + } + + private void setStatus(String line) { + context.setStatus(line.substring(statusPrefix.length()).trim()); + } + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py new file mode 100644 index 0000000000..4d56e92c45 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py @@ -0,0 +1,157 @@ +import sys +import importlib +import os +from abc import ABC, abstractmethod + +class VariantWalker(ABC): + @abstractmethod + def setup(self, *arg): + """ + This function is responsible for setting up any necessary configurations + before processing the entries. + *args: Configuration arguments. + """ + pass + + @abstractmethod + def header(self, header): + """ + This function will process the header as a list of strings. + header (list): A list of strings representing the header. + """ + pass + + @abstractmethod + def map(self, line): + """ + This function processes each entry. + + Args: + line (str): A line read from stdin. + """ + pass + + @abstractmethod + def cleanup(self): + """ + This function is responsible for any cleanup tasks after all entries have been processed. + """ + pass + + def count(self, key, increment): + """ + Increment a counter with a given value. + + Args: + key (str): Counter name + increment (int): Counter increment + """ + if not all(char.isalnum() or char in ['_', '-'] for char in key): + raise ValueError("Invalid key. Key can only contain alphanumeric characters, underscores, and hyphens.") + + print(f"reporter:counter:{key},{increment}", file=sys.stderr) + + def write(self, value): + """ + Write a value to stdout. + + Args: + value (str): The value to write. + """ + print(value) + + def jsonHeaderToVcfHeader(self, jsonHeader): + """ + Convert a JSON header to a VCF header. + + Args: + jsonHeader (dict): The JSON header to convert. + """ + # TODO: Implement this method + return "" + + + def getTmpdir(self): + """ + Get the output directory. + + Returns: + str: The output directory. + """ + return os.environ.get("TMPDIR", "/tmp") + + + + +def main(module_name, class_name, *args): + """ + This is the main function that sets up the environment, reads lines from stdin, + processes them using the map function, and performs cleanup tasks. + + Args: + module_name (str): The name of the module where the VariantWalker subclass is defined. + class_name (str): The name of the VariantWalker subclass to use. + *args: Additional arguments to pass to the setup method of the VariantWalker subclass. + """ + ## If the modulename is a fileName, use the source file loader to load the module + if module_name.endswith(".py"): + ## If the modulename is a relative path, we need to make it an absolute path prepending the current working dir + if not module_name.startswith("/"): + module_name = f"{os.getcwd()}/{module_name}" + + loader = importlib.machinery.SourceFileLoader( 'walker_module', module_name ) + spec = importlib.util.spec_from_loader( 'walker_module', loader ) + module = importlib.util.module_from_spec( spec ) + loader.exec_module( module ) + else: + module = importlib.import_module(module_name) + + WalkerClass = getattr(module, class_name) + walker = WalkerClass() + + try: + walker.setup(*args) + except Exception as e: + print(f"An error occurred during setup: {e}", file=sys.stderr) + raise + + num_entries = 0 + size_entries = 0 + + header_read = False + header = [] + for line in sys.stdin: + num_entries = num_entries + 1 + size_entries = size_entries + len(line) + # Now 'line' does not have trailing '\n' or '\r' + line = line.rstrip() + + ## The line will be a header line if it starts with '#' or if it's the first line + if not header_read: + if line.startswith("#") or num_entries == 1: + header.append(line) + continue + else: + header_read = True + walker.header(header) + + try: + walker.map(line) + except Exception as e: + print(f"An error occurred while processing the line: {e}", file=sys.stderr) + raise + + walker.count("num_entries", num_entries) + walker.count("size_entries", size_entries) + try: + walker.cleanup() + except Exception as e: + print(f"An error occurred during cleanup: {e}", file=sys.stderr) + raise + return 0 + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("Usage: python variant_walker.py [args...]", file=sys.stderr) + sys.exit(1) + sys.exit(main(sys.argv[1], sys.argv[2], *sys.argv[3:])) \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/walker_example.py b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/walker_example.py new file mode 100644 index 0000000000..2c5c92fd6a --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/walker_example.py @@ -0,0 +1,51 @@ +import argparse +from variant_walker import VariantWalker + +class Echo(VariantWalker): + def setup(self, *arg): + pass + + def header(self, header): + self.write(header) + + def map(self, line): + self.write(line) + pass + + def cleanup(self): + pass + +class Cut(VariantWalker): + def setup(self, *args): + parser = argparse.ArgumentParser() + parser.add_argument('--length', default=10, help='The length to trim each line to.') + args = parser.parse_args(args) + self.length = int(args.length) + + def header(self, header): + # Print last line from header + self.write(header[-1]) + pass + + def map(self, line): + self.write(line[:self.length]) + + def cleanup(self): + pass + +class Simplify(VariantWalker): + def setup(self, *args): + pass + + def header(self, header): + # Print last line from header + self.write(header[-1]) + + def map(self, line): + # Split line by tab + fields = line.split('\t') + # Write fields 0, 1, 3, 4 joined by ':' + self.write(':'.join([fields[0], fields[1], fields[3], fields[4]])) + + def cleanup(self): + pass diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java index 773f017a67..33b67fb5b1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java @@ -40,6 +40,7 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory; @@ -284,4 +285,52 @@ public void printVariants() throws Exception { VariantHbaseTestUtils.printVariants(studyMetadata, dbAdaptor, outDir); } + + @Test + public void exportCommand() throws Exception { + URI outdir = newOutputUri(); + List cmdList = Arrays.asList( + "export NUM_VARIANTS=0 ;", + "function setup() {", + " echo \"#SETUP\" ;", + " echo '## Something in single quotes' ; ", + "} ;", + "function map() {", +// " echo \"[$NUM_VARIANTS] $1\" 1>&2 ;", + " echo \"[$NUM_VARIANTS] \" 1>&2 ;", + " echo \"$1\" | jq .id ;", + " NUM_VARIANTS=$((NUM_VARIANTS+1)) ;", + "};", + "function cleanup() {", + " echo \"CLEANUP\" ;", + " echo \"NumVariants = $NUM_VARIANTS\" ;", + "};", + "setup;", + "while read -r i ; do ", + " map \"$i\" ; ", + "done; ", + "cleanup;"); + + // TODO: Add docker prune + + // String cmd = "bash -c '" + String.join("\n", cmdList) + "'"; + String cmd = String.join("\n", cmdList); + String cmdBash = "bash -ce '" + cmd.replace("'", "'\"'\"'") + "'"; + String cmdDocker = "docker run --rm -i opencb/opencga-base bash -ce '" + cmd.replace("'", "'\"'\"'") + "'"; + String cmdPython1 = "python variant_walker.py walker_example Cut --length 30"; +// String cmdPython2 = "python /home/jacobo/appl/opencga/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/* opencga-storage-hadoop-walker-example MyWalker --length 30"; + + +// variantStorageEngine.walkData(outdir.resolve("variant3.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdDocker); +// variantStorageEngine.walkData(outdir.resolve("variant2.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdBash); +// variantStorageEngine.walkData(outdir.resolve("variant1.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmd); +// variantStorageEngine.walkData(outdir.resolve("variant5.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdPython1); +// variantStorageEngine.walkData(outdir.resolve("variant8.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdPython2); +// variantStorageEngine.walkData(outdir.resolve("variant6.txt.gz"), VariantWriterFactory.VariantOutputFormat.VCF, new Query(), new QueryOptions(), cmdPython); +// variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmd); +// variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmdPython1); + variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "my-python-app:latest", cmdPython1); + + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java index f7f8951947..32f0151d67 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java @@ -176,7 +176,7 @@ public void exportTped() throws Exception { public void exportJson() throws Exception { String fileName = "variants.json"; URI uri = getOutputUri(fileName); - variantStorageEngine.exportData(uri, VariantWriterFactory.VariantOutputFormat.JSON, null, new Query(STUDY.key(), study1), new QueryOptions()); + variantStorageEngine.exportData(uri, VariantWriterFactory.VariantOutputFormat.JSON, null, new VariantQuery().study(study1).includeSampleAll(), new QueryOptions()); copyToLocal(fileName, uri); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file1.genome.vcf b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file1.genome.vcf index ad5044c0fb..9457d3446f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file1.genome.vcf +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file1.genome.vcf @@ -2,6 +2,7 @@ ##FORMAT= ##FORMAT= ##INFO= +##contig= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT s1 1 1 . N . . . END=10003 GT:DP .:. 1 10004 . C . . . END=10010 GT:DP 0/0:3 diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file2.genome.vcf b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file2.genome.vcf index 9796f163be..f240e02b85 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file2.genome.vcf +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/gaps/file2.genome.vcf @@ -7,6 +7,7 @@ ##MULTI_OVERLAP=1:10013:T:C and 1:10014:A:T with 1:10011:ATTT:A ##INSERTION_GAP=1:10031:T:TAAA does not overlap with any from here ##PARTIAL_REFERENCE_BLOCK=1:10044-10053 does not have DP field. +##contig= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT s2 1 1 . N . . . END=10003 GT:DP .:. 1 10004 . C . . . END=10012 GT:DP 0/0:3 From 9ea00ebc21068fed1f919e1e975161e65b43c25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 10 Oct 2024 10:48:31 +0100 Subject: [PATCH 004/122] storage: Add STDERR to exception thrown. Fix max_bytes_per_map. #TASK-6722 --- .../analysis/variant/VariantWalkerTool.java | 22 +++-- .../manager/VariantStorageManager.java | 26 ++++-- .../manager/VariantStorageManagerTest.java | 16 ++++ .../core/variant/VariantStorageEngine.java | 19 +++- .../core/variant/VariantStorageOptions.java | 2 +- .../core/variant/io/VariantWriterFactory.java | 2 +- .../main/resources/storage-configuration.yml | 10 +++ .../dummy/DummyVariantStorageEngine.java | 2 +- .../variant/HadoopVariantStorageEngine.java | 39 +++++++-- .../variant/HadoopVariantStorageOptions.java | 17 ++++ .../variant/mr/StreamVariantDriver.java | 18 ++++ .../variant/mr/StreamVariantMapper.java | 87 +++++++++++++------ 12 files changed, 205 insertions(+), 55 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index 3e826de405..a3eddd7eef 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -26,7 +26,6 @@ import org.opencb.opencga.core.models.variant.VariantWalkerParams; import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.core.tools.annotations.ToolParams; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import java.net.URI; @@ -55,6 +54,15 @@ protected void check() throws Exception { } format = VariantWriterFactory.toOutputFormat(toolParams.getOutputFileName(), toolParams.getOutputFileName()); + if (!format.isPlain()) { + format = format.inPlain(); + } + + if (StringUtils.isEmpty(toolParams.getOutputFileName())) { + toolParams.setOutputFileName("output." + format.toString().toLowerCase() + ".gz"); + } else if (!toolParams.getOutputFileName().endsWith(".gz")) { + toolParams.setOutputFileName(toolParams.getOutputFileName() + ".gz"); + } } @Override @@ -70,15 +78,11 @@ protected void run() throws Exception { // The scratch directory is expected to be faster than the final directory // This also avoids moving files to final directory if the tool fails Path outDir = getScratchDir(); - String outputFile = StringUtils.isEmpty(toolParams.getOutputFileName()) - ? outDir.toString() - : outDir.resolve(toolParams.getOutputFileName()).toString(); + String outputFile = outDir.resolve(toolParams.getOutputFileName()).toString(); Query query = toolParams.toQuery(); - QueryOptions queryOptions = new QueryOptions(params); - for (VariantQueryParam param : VariantQueryParam.values()) { - queryOptions.remove(param.key()); - } - uris.addAll(variantStorageManager.walkData(outputFile, + QueryOptions queryOptions = new QueryOptions().append(QueryOptions.INCLUDE, toolParams.getInclude()) + .append(QueryOptions.EXCLUDE, toolParams.getExclude()); + uris.add(variantStorageManager.walkData(outputFile, format, query, queryOptions, toolParams.getDockerImage(), toolParams.getCommandLine(), token)); }); step("move-files", () -> { diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index d1e276fbf3..f292e6d6a3 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -88,6 +88,7 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; import org.opencb.opencga.storage.core.variant.query.ParsedQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; @@ -98,6 +99,7 @@ import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.Paths; import java.util.*; import java.util.concurrent.TimeUnit; @@ -202,14 +204,19 @@ public List exportData(String outputFile, VariantOutputFormat outputFormat, * @throws StorageEngineException If there is any error exporting variants * @return generated files */ - public List walkData(String outputFile, VariantOutputFormat format, + public URI walkData(String outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, String dockerImage, String commandLine, String token) throws CatalogException, StorageEngineException { String anyStudy = catalogUtils.getAnyStudy(query, token); return secureAnalysis(VariantWalkerTool.ID, anyStudy, queryOptions, token, engine -> { Query finalQuery = catalogUtils.parseQuery(query, queryOptions, engine.getCellBaseUtils(), token); checkSamplesPermissions(finalQuery, queryOptions, token); - URI outputUri = new VariantExportOperationManager(this, engine).getOutputUri(outputFile, format, finalQuery, token); + URI outputUri; + try { + outputUri = UriUtils.createUri(outputFile); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } return engine.walkData(outputUri, format, finalQuery, queryOptions, dockerImage, commandLine); }); } @@ -533,7 +540,7 @@ public boolean hasVariantSetup(String studyStr, String token) throws CatalogExce public ObjectMap configureProject(String projectStr, ObjectMap params, String token) throws CatalogException, StorageEngineException { return secureOperationByProject("configure", projectStr, params, token, engine -> { - validateNewConfiguration(engine, params); + validateNewConfiguration(engine, params, token); DataStore dataStore = getDataStoreByProjectId(projectStr, token); @@ -546,7 +553,7 @@ public ObjectMap configureProject(String projectStr, ObjectMap params, String to public ObjectMap configureStudy(String studyStr, ObjectMap params, String token) throws CatalogException, StorageEngineException { return secureOperation("configure", studyStr, params, token, engine -> { - validateNewConfiguration(engine, params); + validateNewConfiguration(engine, params, token); Study study = catalogManager.getStudyManager() .get(studyStr, new QueryOptions(INCLUDE, StudyDBAdaptor.QueryParams.INTERNAL_CONFIGURATION_VARIANT_ENGINE_OPTIONS.key()), @@ -570,12 +577,13 @@ public ObjectMap configureStudy(String studyStr, ObjectMap params, String token) }); } - private void validateNewConfiguration(VariantStorageEngine engine, ObjectMap params) throws StorageEngineException { - for (VariantStorageOptions option : VariantStorageOptions.values()) { - if (option.isProtected() && params.get(option.key()) != null) { - throw new StorageEngineException("Unable to update protected option '" + option.key() + "'"); - } + private void validateNewConfiguration(VariantStorageEngine engine, ObjectMap params, String token) + throws StorageEngineException, CatalogException { + if (catalogManager.getAuthorizationManager().isOpencgaAdministrator(catalogManager.getUserManager().validateToken(token))) { + logger.info("Skip configuration validation. User is an admin."); + return; } + engine.validateNewConfiguration(params); } /** diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManagerTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManagerTest.java index 4aeedde871..6f371c7fa8 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManagerTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManagerTest.java @@ -35,6 +35,7 @@ import org.opencb.opencga.core.testclassification.duration.MediumTests; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import java.util.Collections; import java.util.HashSet; @@ -101,6 +102,21 @@ public void testConfigure() throws CatalogException, StorageEngineException { assertNotNull(vse2.getOptions().get("KeyFromTheSecondStudy")); } + @Test + public void testConfigureProtectedValues() throws Exception { + VariantStorageOptions key = VariantStorageOptions.WALKER_DOCKER_MEMORY; + assertTrue(key.isProtected()); + ObjectMap conf = new ObjectMap(key.key(), "30g"); + + String fqn = catalogManager.getProjectManager().get(projectId, null, sessionId).first().getFqn(); + + variantManager.configureProject(fqn, new ObjectMap(conf), opencga.getAdminToken()); + + thrown.expect(StorageEngineException.class); + thrown.expectMessage("Unable to update protected option '" + key.key() + "'"); + variantManager.configureProject(projectId, new ObjectMap(conf), sessionId); + } + @Test public void testConfigureSampleIndex() throws Exception { SampleIndexConfiguration conf = getRandomConf(); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index bf46887740..b10b2c7305 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -285,7 +285,7 @@ public List exportData(URI outputFile, VariantOutputFormat outputFormat, UR return exporter.export(outputFile, outputFormat, variantsFile, parsedVariantQuery); } - public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, + public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String dockerImage, String commandLine) throws IOException, StorageEngineException { if (format == VariantWriterFactory.VariantOutputFormat.VCF || format == VariantWriterFactory.VariantOutputFormat.VCF_GZ) { @@ -304,8 +304,11 @@ public List walkData(URI outputFile, VariantWriterFactory.VariantOutputForm String dockerCommandLine = "docker run --rm -i " + "--memory " + memory + " " - + "--cpus " + cpu + " " - + "--user " + user + " "; + + "--cpus " + cpu + " "; + + if (StringUtils.isNotEmpty(user)) { + dockerCommandLine += "--user " + user + " "; + } if (StringUtils.isNotEmpty(volume)) { dockerCommandLine += "-v " + volume + ":/data "; @@ -323,7 +326,7 @@ public List walkData(URI outputFile, VariantWriterFactory.VariantOutputForm } - public abstract List walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, + public abstract URI walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException; @@ -1202,6 +1205,14 @@ public abstract void loadVariantScore(URI scoreFile, String study, String scoreN @Override public abstract void testConnection() throws StorageEngineException; + public void validateNewConfiguration(ObjectMap params) throws StorageEngineException { + for (VariantStorageOptions option : VariantStorageOptions.values()) { + if (option.isProtected() && params.get(option.key()) != null) { + throw new StorageEngineException("Unable to update protected option '" + option.key() + "'"); + } + } + } + public void reloadCellbaseConfiguration() { cellBaseUtils = null; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index f7736bd5b1..b00bd525bd 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -102,7 +102,7 @@ public enum VariantStorageOptions implements ConfigurationOption { WALKER_DOCKER_MEMORY("walker.docker.memory", "512m", true), WALKER_DOCKER_CPU("walker.docker.cpu", "1", true), - WALKER_DOCKER_USER("walker.docker.user", "root", true), + WALKER_DOCKER_USER("walker.docker.user", "", true), WALKER_DOCKER_ENV("walker.docker.env", "", true), WALKER_DOCKER_MOUNT("walker.docker.mount", "", true), WALKER_DOCKER_OPTS("walker.docker.opts", "", true), diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java index fa002facbd..61c2e6552d 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java @@ -122,7 +122,7 @@ public boolean isSnappy() { return extension.endsWith(".snappy"); } - public VariantOutputFormat inPlan() { + public VariantOutputFormat inPlain() { if (!isPlain()) { return VariantOutputFormat.valueOf(name().replace("_GZ", "").replace("_SNAPPY", "")); } else { diff --git a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml index b9970d18ea..21c2dd12f7 100644 --- a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml +++ b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml @@ -128,6 +128,13 @@ variant: search.intersect.always: false # Force intersect queries search.intersect.params.threshold: 3 # Minimum number of QueryParams in the query to intersect + walker.docker.memory: "512m" # Memory limit for the docker executor + walker.docker.cpu: "1" # CPU limit for the docker executor + walker.docker.user: "" # User to run the docker executor + walker.docker.env: "" # Environment variables to be passed to the docker executor. e.g. key=value,key2=value2 + walker.docker.mount: "" # Volumes to be mounted in the docker executor + walker.docker.opts: "" # Additional docker options + ## The following section defines all available storage engine plugins installed engines: ## Hadoop Storage Engine @@ -177,6 +184,9 @@ variant: # See opencb/opencga#352 for more info. storage.hadoop.mr.scanner.timeout: 300000 + # DOCKER_HOST environment variable to be used by the docker executor inside the MapReduce job + storage.hadoop.mr.stream.docker.host: "" + mapreduce.map.memory.mb: 2048 DeleteHBaseColumnDriver: storage.hadoop.write.mappers.limit.factor: 4 diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java index e10370dcaa..65a0169ef8 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java @@ -143,7 +143,7 @@ public void importData(URI input, VariantMetadata metadata, List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { + public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { throw new UnsupportedOperationException("Unable to walk data in " + getStorageEngineId()); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index fdee34d313..061bc95642 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -317,20 +317,38 @@ protected VariantExporter newVariantExporter(VariantMetadataFactory metadataFact } @Override - public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, + public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { ParsedVariantQuery variantQuery = parseQuery(query, queryOptions); int studyId = variantQuery.getStudyQuery().getDefaultStudy().getId(); + ObjectMap params = new ObjectMap(getOptions()).appendAll(variantQuery.getQuery()).appendAll(variantQuery.getInputOptions()); + params.remove(StreamVariantDriver.COMMAND_LINE_PARAM); + + String memory = getOptions().getString(WALKER_DOCKER_MEMORY.key(), WALKER_DOCKER_MEMORY.defaultValue()); + int memoryBytes; + if (memory.endsWith("M") || memory.endsWith("m")) { + memoryBytes = Integer.parseInt(memory.substring(0, memory.length() - 1)) * 1024 * 1024; + } else if (memory.endsWith("G") || memory.endsWith("g")) { + memoryBytes = Integer.parseInt(memory.substring(0, memory.length() - 1)) * 1024 * 1024 * 1024; + } else { + memoryBytes = Integer.parseInt(memory); + } + + String dockerHost = getOptions().getString(MR_STREAM_DOCKER_HOST.key(), MR_STREAM_DOCKER_HOST.defaultValue()); + if (StringUtils.isNotEmpty(dockerHost)) { + params.put(StreamVariantDriver.ENVIRONMENT_VARIABLES, "DOCKER_HOST=" + dockerHost); + } + getMRExecutor().run(StreamVariantDriver.class, StreamVariantDriver.buildArgs( null, getVariantTableName(), studyId, null, - new ObjectMap().appendAll(variantQuery.getQuery()).appendAll(variantQuery.getInputOptions()) - .append(StreamVariantDriver.MAX_BYTES_PER_MAP_PARAM, 1024 * 10) + params + .append(StreamVariantDriver.MAX_BYTES_PER_MAP_PARAM, memoryBytes / 2) .append(StreamVariantDriver.COMMAND_LINE_BASE64_PARAM, Base64.getEncoder().encodeToString(commandLine.getBytes())) .append(StreamVariantDriver.INPUT_FORMAT_PARAM, format.toString()) .append(StreamVariantDriver.OUTPUT_PARAM, outputFile) - ), ""); - return null; + ), "Walk data"); + return outputFile; } @Override @@ -1335,4 +1353,15 @@ public void testConnection() throws StorageEngineException { } } + @Override + public void validateNewConfiguration(ObjectMap params) throws StorageEngineException { + super.validateNewConfiguration(params); + + for (HadoopVariantStorageOptions option : HadoopVariantStorageOptions.values()) { + if (option.isProtected() && params.get(option.key()) != null) { + throw new StorageEngineException("Unable to update protected option '" + option.key() + "'"); + } + } + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java index 817605be87..363b07e9fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java @@ -60,6 +60,8 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { MR_EXECUTOR_SSH_HADOOP_SCP_BIN("storage.hadoop.mr.executor.ssh.hadoop-scp.bin", "misc/scripts/hadoop-scp.sh"), MR_EXECUTOR_SSH_HADOOP_TERMINATION_GRACE_PERIOD_SECONDS("storage.hadoop.mr.executor.ssh.terminationGracePeriodSeconds", 120), + MR_STREAM_DOCKER_HOST("storage.hadoop.mr.stream.docker.host", "", true), + ///////////////////////// // Variant table configuration ///////////////////////// @@ -134,6 +136,7 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { private final String key; private final Object value; + private final boolean isProtected; HadoopVariantStorageOptions(String key) { this(key, null); @@ -142,6 +145,13 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { HadoopVariantStorageOptions(String key, Object value) { this.key = key; this.value = value; + this.isProtected = false; + } + + HadoopVariantStorageOptions(String key, Object value, boolean isProtected) { + this.key = key; + this.value = value; + this.isProtected = isProtected; } @Override @@ -157,4 +167,11 @@ public String key() { public T defaultValue() { return (T) value; } + + @Override + public boolean isProtected() { + return isProtected; + } + + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index bb31552ad6..5a248e190e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.HashMap; import java.util.Map; public class StreamVariantDriver extends VariantDriver { @@ -28,11 +29,13 @@ public class StreamVariantDriver extends VariantDriver { public static final String COMMAND_LINE_PARAM = "commandLine"; public static final String COMMAND_LINE_BASE64_PARAM = "commandLineBase64"; public static final String MAX_BYTES_PER_MAP_PARAM = "maxBytesPerMap"; + public static final String ENVIRONMENT_VARIABLES = "envVars"; private VariantWriterFactory.VariantOutputFormat format; private int maxBytesPerMap; private static Logger logger = LoggerFactory.getLogger(StreamVariantDriver.class); private String commandLine; + private Map envVars; private Class mapperClass; private Class reducerClass; @@ -76,6 +79,20 @@ protected void parseAndValidateParameters() throws IOException { commandLine = new String(java.util.Base64.getDecoder().decode(commandLineBase64)); } + envVars = new HashMap<>(); + String envVarsStr = getParam(ENVIRONMENT_VARIABLES); + if (StringUtils.isNotEmpty(envVarsStr)) { + String[] split = envVarsStr.split(","); + for (String s : split) { + String[] split1 = s.split("="); + if (split1.length != 2) { + throw new IllegalArgumentException("Invalid environment variable '" + s + "'"); + } + envVars.put(split1[0], split1[1]); + } + } + + String outdirStr = getParam(OUTPUT_PARAM); if (StringUtils.isEmpty(outdirStr)) { throw new IllegalArgumentException("Missing argument " + OUTPUT_PARAM); @@ -115,6 +132,7 @@ protected void setupJob(Job job) throws IOException { StreamVariantMapper.setCommandLine(job, commandLine); StreamVariantMapper.setVariantFormat(job, format); StreamVariantMapper.setMaxInputBytesPerProcess(job, maxBytesPerMap); + StreamVariantMapper.setEnvironment(job, envVars); reducerClass = Reducer.class; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 163d6bd964..df5425e8d4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -18,10 +18,7 @@ import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; import java.io.*; -import java.util.Base64; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.atomic.AtomicReference; +import java.util.*; import static org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper.COUNTER_GROUP_NAME; @@ -30,8 +27,9 @@ public class StreamVariantMapper extends VariantMapper throwable = new AtomicReference<>(); + protected final List throwables = Collections.synchronizedList(new ArrayList<>()); private volatile boolean processProvidedStatus_ = false; public static void setCommandLine(Job job, String commandLine) { String commandLineBase64 = Base64.getEncoder().encodeToString(commandLine.getBytes()); - job.getConfiguration().set(STREAMPROCESSOR, commandLineBase64); + job.getConfiguration().set(COMMANDLINE_BASE64, commandLineBase64); } public static void setVariantFormat(Job job, VariantWriterFactory.VariantOutputFormat format) { @@ -82,16 +80,15 @@ public static void setMaxInputBytesPerProcess(Job job, int maxInputBytesPerProce protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); - commandLine = new String(Base64.getDecoder().decode(conf.get(STREAMPROCESSOR))); + commandLine = new String(Base64.getDecoder().decode(conf.get(COMMANDLINE_BASE64))); maxInputBytesPerProcess = conf.getInt(MAX_INPUT_BYTES_PER_PROCESS, 1024 * 1024 * 1024); format = VariantWriterFactory.toOutputFormat(conf.get(VARIANT_FORMAT), ""); if (!format.isPlain()) { - format = format.inPlan(); + format = format.inPlain(); } - envs = new HashMap<>(); - addEnvironment(envs, conf.get("stream.addenvironment")); + addEnvironment(envs, conf); // add TMPDIR environment variable with the value of java.io.tmpdir envs.put("TMPDIR", System.getProperty("java.io.tmpdir")); @@ -135,24 +132,33 @@ public void run(Context context) throws IOException, InterruptedException { } private boolean hasExceptions() { - return throwable.get() != null; + return !throwables.isEmpty(); } private void setException(Throwable th) { - if (!throwable.compareAndSet(null, th)) { - synchronized (throwable) { - // addSuppressed is not thread safe - throwable.get().addSuppressed(th); - } - } + throwables.add(th); LOG.warn("{}", th); } private void throwExceptionIfAny() throws IOException { if (hasExceptions()) { - Throwable cause = throwable.get(); - throwable.set(null); - throw new IOException("MROutput/MRErrThread failed:", cause); + String message = "StreamVariantMapper failed:"; + if (stderrThread != null) { + String stderr = String.join("\n", stderrThread.stderrBuffer); + message += "\nSTDERR: " + stderr; + } + if (throwables.size() == 1) { + Throwable cause = throwables.get(0); + throwables.clear(); + throw new IOException(message, cause); + } else { + IOException exception = new IOException(message); + for (int i = 1; i < throwables.size(); i++) { + exception.addSuppressed(throwables.get(i)); + } + throwables.clear(); + throw exception; + } } } @@ -247,7 +253,6 @@ private void startProcess(Context context) throws IOException { processedBytes = 0; numRecordsRead = 0; numRecordsWritten = 0; - throwable.set(null); variantDataWriter.open(); variantDataWriter.pre(); @@ -255,7 +260,30 @@ private void startProcess(Context context) throws IOException { } - void addEnvironment(Map env, String nameVals) { + public static void setEnvironment(Job job, Map env) { + if (env == null || env.isEmpty()) { + return; + } + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : env.entrySet()) { + if (entry.getKey().contains(" ") || entry.getValue().contains(" ")) { + throw new IllegalArgumentException("Environment variables cannot contain spaces: " + + "'" + entry.getKey() + "' = '" + entry.getValue() + "'"); + } + if (entry.getKey().contains("=") || entry.getValue().contains("=")) { + throw new IllegalArgumentException("Environment variables cannot contain '=': " + + "'" + entry.getKey() + "' = '" + entry.getValue() + "'"); + } + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(entry.getKey()).append("=").append(entry.getValue()); + } + job.getConfiguration().set(ADDENVIRONMENT_PARAM, sb.toString()); + } + + public static void addEnvironment(Map env, Configuration conf) { + String nameVals = conf.get(ADDENVIRONMENT_PARAM); // encoding "a=b c=d" from StreamJob if (nameVals == null) { return; @@ -264,7 +292,7 @@ void addEnvironment(Map env, String nameVals) { for (int i = 0; i < nv.length; i++) { String[] pair = nv[i].split("=", 2); if (pair.length != 2) { - LOG.info("Skip env entry:" + nv[i]); + throw new IllegalArgumentException("Invalid name=value: " + nv[i]); } else { env.put(pair[0], pair[1]); } @@ -319,6 +347,9 @@ private class MRErrorThread extends Thread { private final String reporterPrefix; private final String counterPrefix; private final String statusPrefix; + private final LinkedList stderrBuffer = new LinkedList<>(); + private int stderrBufferSize = 0; + private static final int STDERR_BUFFER_CAPACITY = 10 * 1024; MRErrorThread(Context context) { this.context = context; @@ -345,6 +376,12 @@ public void run() { LOG.warn("Cannot parse reporter line: " + lineStr); } } else { + // Store STDERR in a circular buffer (just the last 10KB), and include it in case of exception + stderrBuffer.add(lineStr); + stderrBufferSize += lineStr.length(); + while (stderrBufferSize > STDERR_BUFFER_CAPACITY && stderrBuffer.size() > 3) { + stderrBufferSize -= stderrBuffer.remove().length(); + } LOG.info("[STDERR] - " + lineStr); // System.err.println(lineStr); } From 7558a26ee5c424efb5876623b9554c3e11721a8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 10 Oct 2024 12:47:47 +0100 Subject: [PATCH 005/122] storage: Add satus details when throwing exceptions. #TASK-6722 --- .../variant/mr/StreamVariantMapper.java | 52 +++++++++++++++---- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index df5425e8d4..f2c6e4c1d9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -117,13 +117,24 @@ public void run(Context context) throws IOException, InterruptedException { map(context.getCurrentKey(), context.getCurrentValue(), context); } while (!hasExceptions() && context.nextKeyValue()); } catch (Throwable th) { - setException(th); + Object currentKey = context.getCurrentKey(); + if (currentKey != null) { + String keyStr; + if (currentKey instanceof ImmutableBytesWritable) { + keyStr = Bytes.toStringBinary(((ImmutableBytesWritable) currentKey).get()); + } else { + keyStr = currentKey.toString(); + } + addException("Exception in mapper for key: " + keyStr, th); + } else { + addException(th); + } } try { // Always call cleanup, even if there was an exception cleanup(context); } catch (Throwable th) { - setException(th); + addException(th); } } else { context.getCounter(COUNTER_GROUP_NAME, "EMPTY_INPUT_SPLIT").increment(1); @@ -135,9 +146,30 @@ private boolean hasExceptions() { return !throwables.isEmpty(); } - private void setException(Throwable th) { + private void addException(String message, Throwable th) { + addException(new Exception(message, th)); + } + + private void addException(Throwable th) { throwables.add(th); LOG.warn("{}", th); + if (th instanceof OutOfMemoryError) { + try { + // Print the current memory status in multiple lines + Runtime runtime = Runtime.getRuntime(); + LOG.warn("Catch OutOfMemoryError!"); + LOG.warn("Free memory: " + runtime.freeMemory()); + LOG.warn("Total memory: " + runtime.totalMemory()); + LOG.warn("Max memory: " + runtime.maxMemory()); + th.addSuppressed(new Exception( + "Free memory: " + runtime.freeMemory() + ", " + + "Total memory: " + runtime.totalMemory() + ", " + + "Max memory: " + runtime.maxMemory())); + } catch (Throwable t) { + // Ignore any exception while printing the memory status + LOG.warn("Error printing memory status", t); + } + } } private void throwExceptionIfAny() throws IOException { @@ -201,7 +233,7 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept process = null; } } catch (Throwable th) { - setException(th); + addException(th); } try { @@ -211,7 +243,7 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept stdout = null; } } catch (Throwable th) { - setException(th); + addException(th); } try { if (stderr != null) { @@ -220,7 +252,7 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept stderr = null; } } catch (Throwable th) { - setException(th); + addException(th); } // drainStdout(context); } @@ -231,6 +263,9 @@ private void startProcess(Context context) throws IOException { // Start the process ProcessBuilder builder = new ProcessBuilder("bash", "-ce", commandLine); +// System.getenv().forEach((k, v) -> LOG.info("SYSTEM ENV: " + k + "=" + v)); +// builder.environment().forEach((k, v) -> LOG.info("ProcessBuilder ENV: " + k + "=" + v)); +// envs.forEach((k, v) -> LOG.info("Config ENV: " + k + "=" + v)); builder.environment().putAll(envs); process = builder.start(); @@ -334,7 +369,7 @@ public void run() { } } } catch (Throwable th) { - setException(th); + addException(th); } } } @@ -383,7 +418,6 @@ public void run() { stderrBufferSize -= stderrBuffer.remove().length(); } LOG.info("[STDERR] - " + lineStr); -// System.err.println(lineStr); } long now = System.currentTimeMillis(); if (now - lastStderrReport > REPORTER_ERR_DELAY) { @@ -393,7 +427,7 @@ public void run() { line.clear(); } } catch (Throwable th) { - setException(th); + addException(th); } } From bc7c6ae8b65db767e57f7771cc6f3f276af25d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 11 Oct 2024 08:57:08 +0100 Subject: [PATCH 006/122] storage: Fix walker output file name #TASK-6722 --- .../opencb/opencga/analysis/variant/VariantWalkerTool.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index a3eddd7eef..56e008e0da 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -53,13 +53,13 @@ protected void check() throws Exception { toolParams.setFileFormat(VariantWriterFactory.VariantOutputFormat.VCF.toString()); } - format = VariantWriterFactory.toOutputFormat(toolParams.getOutputFileName(), toolParams.getOutputFileName()); + format = VariantWriterFactory.toOutputFormat(toolParams.getFileFormat(), toolParams.getOutputFileName()); if (!format.isPlain()) { format = format.inPlain(); } if (StringUtils.isEmpty(toolParams.getOutputFileName())) { - toolParams.setOutputFileName("output." + format.toString().toLowerCase() + ".gz"); + toolParams.setOutputFileName("output.txt.gz"); } else if (!toolParams.getOutputFileName().endsWith(".gz")) { toolParams.setOutputFileName(toolParams.getOutputFileName() + ".gz"); } From ab4dff5b076cb29e684024f593c5084f5ed7cebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 15 Oct 2024 16:14:02 +0100 Subject: [PATCH 007/122] storage: Properly configure task java heap #TASK-6722 --- .../opencb/opencga/core/common/IOUtils.java | 15 ++-- .../core/variant/VariantStorageOptions.java | 2 +- .../main/resources/storage-configuration.yml | 18 ++-- .../hadoop/utils/AbstractHBaseDriver.java | 11 ++- .../variant/HadoopVariantStorageEngine.java | 14 +-- .../variant/HadoopVariantStorageOptions.java | 11 +++ .../variant/mr/StreamVariantMapper.java | 31 +++++-- .../variant/mr/VariantMapReduceUtil.java | 90 +++++++++++++++++++ .../src/main/python/variant_walker.py | 37 ++++---- 9 files changed, 174 insertions(+), 55 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java index e37374e76e..eb0cdeaf29 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java @@ -389,15 +389,16 @@ public static long fromHumanReadableToByte(String value, boolean assumeBinary) { if (value.endsWith("B")) { value = value.substring(0, value.length() - 1); } - boolean si; - if (value.endsWith("i")) { - si = false; - value = value.substring(0, value.length() - 1); - } else { - si = true; - } + final boolean si; if (assumeBinary) { si = false; + } else { + if (value.endsWith("i")) { + si = false; + value = value.substring(0, value.length() - 1); + } else { + si = true; + } } int unit = si ? 1000 : 1024; int exp = "KMGTPE".indexOf(value.toUpperCase().charAt(value.length() - 1)) + 1; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index b00bd525bd..c8d8cf63ce 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -100,7 +100,7 @@ public enum VariantStorageOptions implements ConfigurationOption { QUERY_SAMPLE_LIMIT_DEFAULT("query.sample.limit.default", 100), QUERY_SAMPLE_LIMIT_MAX("query.sample.limit.max", 1000), - WALKER_DOCKER_MEMORY("walker.docker.memory", "512m", true), + WALKER_DOCKER_MEMORY("walker.docker.memory", "1024m", true), WALKER_DOCKER_CPU("walker.docker.cpu", "1", true), WALKER_DOCKER_USER("walker.docker.user", "", true), WALKER_DOCKER_ENV("walker.docker.env", "", true), diff --git a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml index 21c2dd12f7..f422770d9b 100644 --- a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml +++ b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml @@ -128,12 +128,12 @@ variant: search.intersect.always: false # Force intersect queries search.intersect.params.threshold: 3 # Minimum number of QueryParams in the query to intersect - walker.docker.memory: "512m" # Memory limit for the docker executor - walker.docker.cpu: "1" # CPU limit for the docker executor - walker.docker.user: "" # User to run the docker executor - walker.docker.env: "" # Environment variables to be passed to the docker executor. e.g. key=value,key2=value2 - walker.docker.mount: "" # Volumes to be mounted in the docker executor - walker.docker.opts: "" # Additional docker options + walker.docker.memory: "1024m" # Memory limit for the docker executor +# walker.docker.cpu: "1" # CPU limit for the docker executor +# walker.docker.user: "" # User to run the docker executor +# walker.docker.env: "" # Environment variables to be passed to the docker executor. e.g. key=value,key2=value2 +# walker.docker.mount: "" # Volumes to be mounted in the docker executor +# walker.docker.opts: "" # Additional docker options ## The following section defines all available storage engine plugins installed engines: @@ -191,16 +191,16 @@ variant: DeleteHBaseColumnDriver: storage.hadoop.write.mappers.limit.factor: 4 DiscoverPendingVariantsDriver: - mapreduce.map.memory.mb: 750 + mapreduce.map.memory.mb: 2048 VariantStatsDriver: mapreduce.map.memory.mb: 2048 + StreamVariantDriver: + mapreduce.map.memory.mb: 3072 SampleIndexDriver: mapreduce.map.memory.mb: 4096 max-columns-per-scan: 8000 SampleIndexAnnotationLoaderDriver: mapreduce.map.memory.mb: 4096 - VariantMigration200Driver: - mapreduce.map.memory.mb: 1024 ## PENDING diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 4e9fe7057a..4a15198f10 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -11,6 +11,7 @@ import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; @@ -80,6 +81,7 @@ private Job newJob() throws IOException { addJobConf(job, MRJobConfig.JOB_RUNNING_MAP_LIMIT); addJobConf(job, MRJobConfig.JOB_RUNNING_REDUCE_LIMIT); addJobConf(job, MRJobConfig.TASK_TIMEOUT); + VariantMapReduceUtil.configureTaskJavaHeap(((JobConf) job.getConfiguration()), getClass()); return job; } @@ -171,10 +173,15 @@ public final int run(String[] args) throws Exception { } else { LOGGER.info(" * Mapper : " + job.getMapperClass().getName()); } - LOGGER.info(" - memory (MB) : " + job.getConfiguration().getInt(MRJobConfig.MAP_MEMORY_MB, -1)); + JobConf jobConf = (JobConf) job.getConfiguration(); + LOGGER.info(" - memory required (MB) : " + jobConf.getMemoryRequired(TaskType.MAP)); + LOGGER.info(" - java-heap (MB) : " + JobConf.parseMaximumHeapSizeMB(jobConf.getTaskJavaOpts(TaskType.MAP))); + LOGGER.info(" - java-opts : " + jobConf.getTaskJavaOpts(TaskType.MAP)); if (job.getNumReduceTasks() > 0) { LOGGER.info(" * Reducer : " + job.getNumReduceTasks() + "x " + job.getReducerClass().getName()); - LOGGER.info(" - memory (MB) : " + job.getConfiguration().getInt(MRJobConfig.REDUCE_MEMORY_MB, -1)); + LOGGER.info(" - memory required (MB) : " + jobConf.getMemoryRequired(TaskType.REDUCE)); + LOGGER.info(" - java-heap (MB) : " + JobConf.parseMaximumHeapSizeMB(jobConf.getTaskJavaOpts(TaskType.REDUCE))); + LOGGER.info(" - java-opts : " + jobConf.getTaskJavaOpts(TaskType.REDUCE)); } else { LOGGER.info(" * Reducer : (no reducer)"); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 061bc95642..8598407233 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -324,15 +324,8 @@ public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat for ObjectMap params = new ObjectMap(getOptions()).appendAll(variantQuery.getQuery()).appendAll(variantQuery.getInputOptions()); params.remove(StreamVariantDriver.COMMAND_LINE_PARAM); - String memory = getOptions().getString(WALKER_DOCKER_MEMORY.key(), WALKER_DOCKER_MEMORY.defaultValue()); - int memoryBytes; - if (memory.endsWith("M") || memory.endsWith("m")) { - memoryBytes = Integer.parseInt(memory.substring(0, memory.length() - 1)) * 1024 * 1024; - } else if (memory.endsWith("G") || memory.endsWith("g")) { - memoryBytes = Integer.parseInt(memory.substring(0, memory.length() - 1)) * 1024 * 1024 * 1024; - } else { - memoryBytes = Integer.parseInt(memory); - } + String dockerMemory = getOptions().getString(WALKER_DOCKER_MEMORY.key(), WALKER_DOCKER_MEMORY.defaultValue()); + long dockerMemoryBytes = IOUtils.fromHumanReadableToByte(dockerMemory, true); String dockerHost = getOptions().getString(MR_STREAM_DOCKER_HOST.key(), MR_STREAM_DOCKER_HOST.defaultValue()); if (StringUtils.isNotEmpty(dockerHost)) { @@ -343,7 +336,8 @@ public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat for null, getVariantTableName(), studyId, null, params - .append(StreamVariantDriver.MAX_BYTES_PER_MAP_PARAM, memoryBytes / 2) + .append(MR_HEAP_MAP_OTHER_MB.key(), dockerMemoryBytes / 1024 / 1204) + .append(StreamVariantDriver.MAX_BYTES_PER_MAP_PARAM, dockerMemoryBytes / 2) .append(StreamVariantDriver.COMMAND_LINE_BASE64_PARAM, Base64.getEncoder().encodeToString(commandLine.getBytes())) .append(StreamVariantDriver.INPUT_FORMAT_PARAM, format.toString()) .append(StreamVariantDriver.OUTPUT_PARAM, outputFile) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java index 363b07e9fb..268caaf925 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java @@ -61,6 +61,17 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { MR_EXECUTOR_SSH_HADOOP_TERMINATION_GRACE_PERIOD_SECONDS("storage.hadoop.mr.executor.ssh.terminationGracePeriodSeconds", 120), MR_STREAM_DOCKER_HOST("storage.hadoop.mr.stream.docker.host", "", true), + MR_HEAP_MIN_MB("storage.hadoop.mr.heap.min-mb", 512), // Min heap size for the JVM + MR_HEAP_MAX_MB("storage.hadoop.mr.heap.max-mb", 2048), // Max heap size for the JVM + MR_HEAP_MAP_OTHER_MB("storage.hadoop.mr.heap.map.other-mb", 0), // Other reserved memory. Not used by the JVM heap. + MR_HEAP_REDUCE_OTHER_MB("storage.hadoop.mr.heap.reduce.other-mb", 0), // Other reserved memory. Not used by the JVM heap. + MR_HEAP_MEMORY_MB_RATIO("storage.hadoop.mr.heap.memory-mb.ratio", 0.6), // Ratio of the memory to use for the JVM heap. + // Heap size for the map and reduce tasks. + // If not set, it will be calculated as: + // (REQUIRED_MEMORY - MR_HEAP_OTHER_MB) * MR_HEAP_MEMORY_MB_RATIO + // then caped between MR_HEAP_MIN_MB and MR_HEAP_MAX_MB + MR_HEAP_MAP_MB("storage.hadoop.mr.heap.map.mb"), + MR_HEAP_REDUCE_MB("storage.hadoop.mr.heap.reduce.mb"), ///////////////////////// // Variant table configuration diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index f2c6e4c1d9..1523348429 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -147,7 +147,14 @@ private boolean hasExceptions() { } private void addException(String message, Throwable th) { - addException(new Exception(message, th)); + th.addSuppressed(new AnnotationException(message)); + addException(th); + } + + public static class AnnotationException extends RuntimeException { + public AnnotationException(String message) { + super(message); + } } private void addException(Throwable th) { @@ -156,15 +163,21 @@ private void addException(Throwable th) { if (th instanceof OutOfMemoryError) { try { // Print the current memory status in multiple lines - Runtime runtime = Runtime.getRuntime(); + Runtime rt = Runtime.getRuntime(); LOG.warn("Catch OutOfMemoryError!"); - LOG.warn("Free memory: " + runtime.freeMemory()); - LOG.warn("Total memory: " + runtime.totalMemory()); - LOG.warn("Max memory: " + runtime.maxMemory()); - th.addSuppressed(new Exception( - "Free memory: " + runtime.freeMemory() + ", " - + "Total memory: " + runtime.totalMemory() + ", " - + "Max memory: " + runtime.maxMemory())); + LOG.warn("Free memory: " + rt.freeMemory()); + LOG.warn("Total memory: " + rt.totalMemory()); + LOG.warn("Max memory: " + rt.maxMemory()); + + double mb = 1024 * 1024; + th.addSuppressed(new AnnotationException(String.format("Memory usage. MaxMemory: %.2f MiB" + + " TotalMemory: %.2f MiB" + + " FreeMemory: %.2f MiB" + + " UsedMemory: %.2f MiB", + rt.maxMemory() / mb, + rt.totalMemory() / mb, + rt.freeMemory() / mb, + (rt.totalMemory() - rt.freeMemory()) / mb))); } catch (Throwable t) { // Ignore any exception while printing the memory status LOG.warn("Error printing memory status", t); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index 367196742d..0c3ab30a69 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -15,10 +15,12 @@ import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.TaskType; import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; @@ -51,6 +53,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.regex.Pattern; /** * Created on 27/10/17. @@ -60,6 +63,8 @@ public class VariantMapReduceUtil { private static final Logger LOGGER = LoggerFactory.getLogger(VariantMapReduceUtil.class); + private static final Pattern JAVA_OPTS_XMX_PATTERN = + Pattern.compile(".*(?:^|\\s)-Xmx(\\d+)([gGmMkK]?)(?:$|\\s).*"); public static void initTableMapperJob(Job job, String inTable, String outTable, Scan scan, Class mapperClass) @@ -609,6 +614,26 @@ public static String getParam(Configuration conf, String key, String defaultValu return getParam(conf, key, defaultValue, null); } + private static String getParam(JobConf conf, TaskType taskType, Class clazz, + HadoopVariantStorageOptions mapKey, HadoopVariantStorageOptions reduceKey) { + final String value; + switch (taskType) { + case MAP: + value = getParam(conf, mapKey, clazz); + break; + case REDUCE: + value = getParam(conf, reduceKey, clazz); + break; + default: + throw new IllegalArgumentException("Unexpected task type " + taskType); + } + return value; + } + + public static String getParam(Configuration conf, ConfigurationOption key, Class aClass) { + return getParam(conf, key.key(), key.defaultValue() == null ? null : key.defaultValue().toString(), aClass); + } + /** * Reads a param that might come in different forms. It will take the the first value in this order: * - "--{key}" @@ -641,4 +666,69 @@ public static String getParam(Configuration conf, String key, String defaultValu } return value; } + + public static void configureTaskJavaHeap(JobConf conf, Class clazz) { + configureTaskJavaHeap(conf, TaskType.MAP, clazz); + configureTaskJavaHeap(conf, TaskType.REDUCE, clazz); + } + + public static void configureTaskJavaHeap(JobConf conf, TaskType taskType, Class clazz) { + int memoryRequired = conf.getMemoryRequired(taskType); + String heapStr = getParam(conf, taskType, clazz, + HadoopVariantStorageOptions.MR_HEAP_MAP_MB, + HadoopVariantStorageOptions.MR_HEAP_REDUCE_MB); + + int heap; + if (heapStr != null) { + heap = Integer.parseInt(heapStr); + } else { + int minHeap = Integer.parseInt(getParam(conf, HadoopVariantStorageOptions.MR_HEAP_MIN_MB, clazz)); + int maxHeap = Integer.parseInt(getParam(conf, HadoopVariantStorageOptions.MR_HEAP_MAX_MB, clazz)); + double ratio = Double.parseDouble(getParam(conf, HadoopVariantStorageOptions.MR_HEAP_MEMORY_MB_RATIO, clazz)); + int other = Integer.parseInt(getParam(conf, taskType, clazz, + HadoopVariantStorageOptions.MR_HEAP_MAP_OTHER_MB, + HadoopVariantStorageOptions.MR_HEAP_REDUCE_OTHER_MB)); + + heap = (int) Math.round((memoryRequired - other) * ratio); + heap = Math.max(minHeap, heap); + heap = Math.min(maxHeap, heap); + } + setTaskJavaHeap(conf, taskType, heap); + } + + public static void setTaskJavaHeap(Configuration conf, TaskType taskType, int javaHeapMB) { + String javaOpts = getTaskJavaOpts(conf, taskType); + String xmx = " -Xmx" + javaHeapMB + "m"; + if (javaOpts == null) { + javaOpts = xmx; + } else if (javaOpts.contains("-Xmx")) { + javaOpts = JAVA_OPTS_XMX_PATTERN.matcher(javaOpts).replaceFirst(xmx); + } else { + javaOpts += xmx; + } + switch (taskType) { + case MAP: + conf.set(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, javaOpts); + break; + case REDUCE: + conf.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, javaOpts); + break; + default: + throw new IllegalArgumentException("Unexpected task type " + taskType); + } + } + + public static String getTaskJavaOpts(Configuration conf, TaskType taskType) { + switch (taskType) { + case MAP: + return conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, + conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS)); + case REDUCE: + return conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, + conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS)); + default: + throw new IllegalArgumentException("Unexpected task type " + taskType); + } + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py index 4d56e92c45..fa3ea798e5 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/variant_walker.py @@ -60,15 +60,15 @@ def write(self, value): """ print(value) - def jsonHeaderToVcfHeader(self, jsonHeader): - """ - Convert a JSON header to a VCF header. - - Args: - jsonHeader (dict): The JSON header to convert. - """ - # TODO: Implement this method - return "" + # def jsonHeaderToVcfHeader(self, jsonHeader): + # """ + # Convert a JSON header to a VCF header. + # + # Args: + # jsonHeader (dict): The JSON header to convert. + # """ + # # TODO: Implement this method + # return "" def getTmpdir(self): @@ -130,16 +130,19 @@ def main(module_name, class_name, *args): if not header_read: if line.startswith("#") or num_entries == 1: header.append(line) - continue else: header_read = True - walker.header(header) - - try: - walker.map(line) - except Exception as e: - print(f"An error occurred while processing the line: {e}", file=sys.stderr) - raise + try: + walker.header(header) + except Exception as e: + print(f"An error occurred while processing the header: {e}", file=sys.stderr) + raise + else: + try: + walker.map(line) + except Exception as e: + print(f"An error occurred while processing a line: {e}", file=sys.stderr) + raise walker.count("num_entries", num_entries) walker.count("size_entries", size_entries) From 45366895b3cf98f8cb5fe0c1c6f26af062e25618 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 15 Oct 2024 19:33:20 +0200 Subject: [PATCH 008/122] Prepare next release 3.4.0-SNAPSHOT --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 14 +++++++------- 27 files changed, 33 insertions(+), 33 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 5765c91a15..24019f12dd 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index ec79ddaca0..a86630c5ab 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 03b546cdb8..31df518865 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index dcccc54b1a..68d77922ca 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index c3d75a68a6..a43011299c 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 1fc9a48b2d..6707b15204 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 32da2f3d43..67cd743de8 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 80da61aaca..1fcff2032a 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index be598f2bd8..9d3555928b 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 99ea457792..c9bd1a8856 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index b36f18d36c..78e4374579 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index e3e7c4f60e..f099046300 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index eda95e319c..dd8857dbcb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index d0e32ac366..3aab4cb38e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 4db9da3947..a86f33deb4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 46d081a0a4..e975ebbc12 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 55d47ae74c..179a35e066 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index e64f6f601e..1c5876905e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 6b983e6f59..991c499dc8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index f2f695c9b2..a960e1a068 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index ae04cc37e6..ae6332feb3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index f835d82e61..db8d5b9aec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 34eea924a8..80807b54c5 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 852866f592..3dd2a91dd4 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index a39da9b303..3ab26cac21 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index b17e27b3f2..8b5c1157ef 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 86fa33f69e..ed3a473e15 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.3.0 + 3.4.0-SNAPSHOT pom OpenCGA @@ -43,12 +43,12 @@ - 3.3.0 - 3.3.0 - 6.3.0 - 3.3.0 - 5.3.0 - 3.3.0 + 3.4.0_dev + 3.4.0_dev + 6.4.0-SNAPSHOT + 3.4.0-SNAPSHOT + 5.4.0-SNAPSHOT + 3.4.0-SNAPSHOT 0.2.0 From 7af802034b302c68bf215e552d730d65a1685371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 16 Oct 2024 15:20:12 +0100 Subject: [PATCH 009/122] storage: Run docker image prune on cleanup. #TASK-6722 --- .../variant/mr/StreamVariantMapper.java | 51 ++++++++++++++++++- .../VariantHadoopStoragePipelineTest.java | 2 +- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 1523348429..e02ae93c48 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -1,5 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.mr; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -10,9 +12,13 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.util.LineReader; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.metadata.VariantMetadata; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.exec.Command; import org.opencb.commons.io.DataWriter; +import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; @@ -62,6 +68,7 @@ public class StreamVariantMapper extends VariantMapper throwables = Collections.synchronizedList(new ArrayList<>()); private volatile boolean processProvidedStatus_ = false; + private VariantMetadata metadata; public static void setCommandLine(Job job, String commandLine) { String commandLineBase64 = Base64.getEncoder().encodeToString(commandLine.getBytes()); @@ -125,7 +132,16 @@ public void run(Context context) throws IOException, InterruptedException { } else { keyStr = currentKey.toString(); } - addException("Exception in mapper for key: " + keyStr, th); + String message = "Exception in mapper for key: '" + keyStr + "'"; + try { + Variant currentValue = context.getCurrentValue(); + if (currentValue != null) { + message += " value: '" + currentValue + "'"; + } + } catch (Throwable t) { + th.addSuppressed(t); + } + addException(message, th); } else { addException(th); } @@ -210,9 +226,30 @@ private void throwExceptionIfAny() throws IOException { @Override protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { closeProcess(context); + dockerPruneImages(); super.cleanup(context); } + private void dockerPruneImages() { + try { + LOG.info("Pruning docker images"); + int maxImages = 5; + Command command = new Command(new String[]{"bash", "-c", "[ $(docker image ls --format json | wc -l) -gt " + maxImages + " ] " + + "&& echo 'Run docker image prune' && docker image prune -f -a " + + "|| echo 'Skipping docker image prune. Less than " + maxImages + " images.'"}, Collections.emptyMap()); + command.run(); + int ecode = command.getExitValue(); + + // Throw exception if the process failed + if (ecode != 0) { + throw new IOException("Error executing 'docker image prune -f -a'. Exit code: " + ecode); + } + LOG.info("Docker images pruned"); + } catch (IOException e) { + addException(e); + } + } + @Override protected void map(Object key, Variant value, Context context) throws IOException, InterruptedException { numRecordsRead++; @@ -270,7 +307,7 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept // drainStdout(context); } - private void startProcess(Context context) throws IOException { + private void startProcess(Context context) throws IOException, StorageEngineException { LOG.info("bash -ce '" + commandLine + "'"); context.getCounter(COUNTER_GROUP_NAME, "START_PROCESS").increment(1); @@ -298,6 +335,16 @@ private void startProcess(Context context) throws IOException { variantDataWriter = writerFactory.newDataWriter(format, stdin, new Query(query), new QueryOptions(options)); + + if (format.inPlain() == VariantWriterFactory.VariantOutputFormat.JSON) { + if (metadata == null) { + VariantMetadataFactory metadataFactory = new VariantMetadataFactory(metadataManager); + metadata = metadataFactory.makeVariantMetadata(query, options); + } + ObjectMapper objectMapper = new ObjectMapper().configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); + objectMapper.writeValue((DataOutput) stdin, metadata); + } + processedBytes = 0; numRecordsRead = 0; numRecordsWritten = 0; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java index 33b67fb5b1..ac01326b1c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java @@ -329,7 +329,7 @@ public void exportCommand() throws Exception { // variantStorageEngine.walkData(outdir.resolve("variant6.txt.gz"), VariantWriterFactory.VariantOutputFormat.VCF, new Query(), new QueryOptions(), cmdPython); // variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmd); // variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmdPython1); - variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "my-python-app:latest", cmdPython1); + variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "jcoll/my-python-app:latest", cmdPython1); } From bebdccb928e01248699d46272c0cada5d28cc246 Mon Sep 17 00:00:00 2001 From: pfurio Date: Thu, 17 Oct 2024 14:25:35 +0200 Subject: [PATCH 010/122] catalog: check vsets can be created by study admins, #TASK-7004 --- .../auth/authorization/CatalogAuthorizationManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authorization/CatalogAuthorizationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authorization/CatalogAuthorizationManager.java index 7022be4a0e..d4ae0d690c 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authorization/CatalogAuthorizationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authorization/CatalogAuthorizationManager.java @@ -239,8 +239,8 @@ public void checkCanAssignOrSeePermissions(String organizationId, long studyId, @Override public void checkCanCreateUpdateDeleteVariableSets(String organizationId, long studyId, String userId) throws CatalogException { - if (!isAtLeastOrganizationOwnerOrAdmin(organizationId, userId)) { - throw CatalogAuthorizationException.notOrganizationOwnerOrAdmin("create, update or delete variable sets."); + if (!isAtLeastStudyAdministrator(organizationId, studyId, userId)) { + throw CatalogAuthorizationException.notStudyAdmin("create, update or delete variable sets."); } } From 27f590664c4d05e5361f4d99aa632e2e46e4820b Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 21 Oct 2024 09:43:13 +0200 Subject: [PATCH 011/122] app: allow changing the organization id during main migration,#TASK-7118 --- .../executors/MigrationCommandExecutor.java | 2 +- .../options/MigrationCommandOptions.java | 4 + .../v3/v3_0_0/OrganizationMigration.java | 74 +++++++++++++++++-- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java index 3f5f7cee80..1a52874866 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java @@ -70,7 +70,7 @@ private void runMigrationToV3() throws Exception { setCatalogDatabaseCredentials(options, options.commonOptions); OrganizationMigration organizationMigration = new OrganizationMigration(configuration, options.commonOptions.adminPassword, - options.user); + options.user, options.organizationId); organizationMigration.execute(); } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java index a306f222c3..7d42b1e77c 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java @@ -46,6 +46,10 @@ public class OrganizationMigrationCommandOptions extends AdminCliOptionsParser.C @Parameter(names = {"--user"}, description = "User whose data is going to be migrated. If more than one user of type FULL contains" + " projects and studies, only the one provided will keep the data and will be fully migrated.") public String user; + + @Parameter(names = {"--organization-id"}, description = "Optional parameter to specify how the new organization will be named." + + " By default, if not provided, the organization id will match the user id that is currently owning the data.") + public String organizationId; } @Parameters(commandNames = {"summary"}, commandDescription = "Obtain migrations status summary") diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index d1f749bafe..46e7878275 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -1,10 +1,7 @@ package org.opencb.opencga.app.migrations.v3.v3_0_0; import com.mongodb.client.*; -import com.mongodb.client.model.Filters; -import com.mongodb.client.model.InsertOneModel; -import com.mongodb.client.model.Projections; -import com.mongodb.client.model.Updates; +import com.mongodb.client.model.*; import com.mongodb.client.result.DeleteResult; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -17,12 +14,15 @@ import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptorFactory; import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; import org.opencb.opencga.catalog.exceptions.CatalogAuthorizationException; +import org.opencb.opencga.catalog.exceptions.CatalogDBException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.exceptions.CatalogIOException; import org.opencb.opencga.catalog.io.CatalogIOManager; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.migration.Migration; import org.opencb.opencga.catalog.migration.MigrationTool; +import org.opencb.opencga.catalog.utils.FqnUtils; +import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; @@ -62,10 +62,12 @@ private enum MigrationStatus { ERROR } - public OrganizationMigration(Configuration configuration, String adminPassword, String userId) throws CatalogException { + public OrganizationMigration(Configuration configuration, String adminPassword, String userId, String organizationId) + throws CatalogException { this.configuration = configuration; this.adminPassword = adminPassword; this.userId = userId; + this.organizationId = organizationId; this.status = checkAndInit(); } @@ -193,7 +195,10 @@ private MigrationStatus checkAndInit() throws CatalogException { this.userIdsToDiscardData = new HashSet<>(); } - this.organizationId = this.userId; + if (StringUtils.isEmpty(this.organizationId)) { + this.organizationId = this.userId; + } + ParamUtils.checkIdentifier(this.organizationId, "Organization id"); this.catalogManager = new CatalogManager(configuration); return MigrationStatus.PENDING_MIGRATION; } @@ -360,6 +365,10 @@ protected void run() throws Exception { CatalogIOManager ioManager = new CatalogIOManager(configuration); + Map organizationOwnerMap = new HashMap<>(); + organizationOwnerMap.put(ParamConstants.ADMIN_ORGANIZATION, ParamConstants.OPENCGA_USER_ID); + organizationOwnerMap.put(this.organizationId, this.userId); + // Loop over all organizations to perform additional data model changes for (String organizationId : mongoDBAdaptorFactory.getOrganizationIds()) { ioManager.createOrganization(organizationId); @@ -400,13 +409,14 @@ protected void run() throws Exception { } // Add owner as admin of every study and remove _ownerId field + String ownerId = organizationOwnerMap.get(organizationId); for (String collection : Arrays.asList(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) { MongoCollection mongoCollection = database.getCollection(collection); mongoCollection.updateMany( Filters.eq(StudyDBAdaptor.QueryParams.GROUP_ID.key(), ParamConstants.ADMINS_GROUP), Updates.combine( Updates.unset("_ownerId"), - Updates.push("groups.$.userIds", organizationId) + Updates.push("groups.$.userIds", ownerId) )); } @@ -434,16 +444,64 @@ protected void run() throws Exception { // Set organization counter, owner and authOrigins orgCol.updateOne(Filters.eq("id", organizationId), Updates.combine( Updates.set("_idCounter", counter), - Updates.set("owner", organizationId), + Updates.set("owner", ownerId), Updates.set("configuration.authenticationOrigins", authOrigins) )); } + // If the user didn't want to use the userId as the new organization id, we then need to change all the fqn's + if (!this.organizationId.equals(this.userId)) { + changeFqns(); + } + // Skip current migration for both organizations catalogManager.getMigrationManager().skipPendingMigrations(ParamConstants.ADMIN_ORGANIZATION, opencgaToken); catalogManager.getMigrationManager().skipPendingMigrations(organizationId, opencgaToken); } + private void changeFqns() throws CatalogDBException { + this.dbAdaptorFactory = this.mongoDBAdaptorFactory; + + // Change project fqn's + for (String projectCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, + OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION)) { + migrateCollection(projectCol, new Document(), Projections.include("_id", "id"), (document, bulk) -> { + String projectId = document.getString("id"); + String projectFqn = FqnUtils.buildFqn(this.organizationId, projectId); + bulk.add(new UpdateOneModel<>( + Filters.eq("_id", document.get("_id")), + new Document("$set", new Document("fqn", projectFqn))) + ); + }); + } + + MongoDatabase database = mongoDBAdaptorFactory.getMongoDataStore(organizationId).getDb(); + MongoCollection jobCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION); + MongoCollection jobDeletedCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.DELETED_JOB_COLLECTION); + + // Change study fqn's + for (String studyCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, + OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) { + migrateCollection(studyCol, new Document(), Projections.include("_id", "uid", "fqn"), (document, bulk) -> { + long studyUid = document.get("uid", Number.class).longValue(); + + String oldStudyFqn = document.getString("fqn"); + FqnUtils.FQN oldFqnInstance = FqnUtils.parse(oldStudyFqn); + String newFqn = FqnUtils.buildFqn(this.organizationId, oldFqnInstance.getProject(), oldFqnInstance.getStudy()); + bulk.add(new UpdateOneModel<>( + Filters.eq("_id", document.get("_id")), + new Document("$set", new Document("fqn", newFqn))) + ); + + // Change fqn in all jobs that were pointing to this study + Bson jobQuery = Filters.eq("studyUid", studyUid); + Bson update = Updates.set("study.id", newFqn); + jobCollection.updateMany(jobQuery, update); + jobDeletedCollection.updateMany(jobQuery, update); + }); + } + } + Set> getAvailableMigrations() { Reflections reflections = new Reflections(new ConfigurationBuilder() .setScanners( From 1f25e7adfd6f5c4a6d8baf8b71e2c7f536380cbc Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 21 Oct 2024 15:37:25 +0200 Subject: [PATCH 012/122] Adapt pom to the new SDLC #TASK-7118 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 4 ++-- 27 files changed, 28 insertions(+), 28 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index eb4825ee88..314766dc1d 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index 92d7e27fc3..eaa503dd09 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index dba2d4ff1a..f6f5768166 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 8a221b55fe..909598e048 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index f2ea05a986..c173e0eb19 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index acaffb174f..43f0f3f65b 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index d0d14e9506..bc7eeae241 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index f93f5ffd04..1b1e16acbc 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index fd9869835d..8b4822ad6e 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 640a4a655e..0c0bf1c727 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 74320f628a..20cd38689a 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index e2f710f5e7..3d0375fea0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index 614ad7332c..6837051499 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index f768ed47aa..b5852968a7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 75f6adc95e..5e097253ee 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 46993e19f5..88200e3425 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index bf1c0dc3a0..2d94efc52f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index 87b41711fa..0b50aadf33 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 2bb2e0fea4..25941e5afe 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index bb429c443f..91438cbe32 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index e029d3b5bf..0026c0c4ba 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index b2648803b3..d824df6b12 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index b4258b03d3..470344d436 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 5a605a0565..215850fd9b 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 33ffd81655..b2e665c34f 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 711d494413..0ef882d8ac 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 0c2ea552a9..cc351a473d 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.0.2-SNAPSHOT + 3.0.1-SNAPSHOT pom OpenCGA @@ -44,7 +44,7 @@ - 3.0.0.1 + 3.0.1 6.0.0 3.0.0 5.0.0 From acac916e782ea8557c902bb6b22bc9d29c293d83 Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 22 Oct 2024 16:23:36 +0200 Subject: [PATCH 013/122] app: store old fqn in attributes before migration, #TASK-7118 --- .../v3/v3_0_0/OrganizationMigration.java | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 46e7878275..b42b85c18b 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -24,6 +24,7 @@ import org.opencb.opencga.catalog.utils.FqnUtils; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.api.ParamConstants; +import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; import org.opencb.opencga.core.models.migration.MigrationRun; @@ -461,16 +462,23 @@ protected void run() throws Exception { private void changeFqns() throws CatalogDBException { this.dbAdaptorFactory = this.mongoDBAdaptorFactory; + String date = TimeUtils.getTime(); // Change project fqn's for (String projectCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION)) { - migrateCollection(projectCol, new Document(), Projections.include("_id", "id"), (document, bulk) -> { + migrateCollection(projectCol, new Document(), Projections.include("_id", "id", "fqn"), (document, bulk) -> { + String currentFqn = document.getString("fqn"); String projectId = document.getString("id"); String projectFqn = FqnUtils.buildFqn(this.organizationId, projectId); bulk.add(new UpdateOneModel<>( Filters.eq("_id", document.get("_id")), - new Document("$set", new Document("fqn", projectFqn))) + new Document("$set", new Document() + .append("fqn", projectFqn) + .append("attributes.OPENCGA.3_0_0", new Document() + .append("date", date) + .append("oldFqn", currentFqn) + ))) ); }); } @@ -490,12 +498,24 @@ private void changeFqns() throws CatalogDBException { String newFqn = FqnUtils.buildFqn(this.organizationId, oldFqnInstance.getProject(), oldFqnInstance.getStudy()); bulk.add(new UpdateOneModel<>( Filters.eq("_id", document.get("_id")), - new Document("$set", new Document("fqn", newFqn))) + new Document("$set", new Document() + .append("fqn", newFqn) + .append("attributes.OPENCGA.3_0_0", new Document() + .append("date", date) + .append("oldFqn", oldStudyFqn) + ) + )) ); // Change fqn in all jobs that were pointing to this study Bson jobQuery = Filters.eq("studyUid", studyUid); - Bson update = Updates.set("study.id", newFqn); + Bson update = new Document("$set", new Document() + .append("study.id", newFqn) + .append("attributes.OPENCGA.3_0_0", new Document() + .append("date", date) + .append("oldStudyFqn", oldStudyFqn) + ) + ); jobCollection.updateMany(jobQuery, update); jobDeletedCollection.updateMany(jobQuery, update); }); From d2c80732611df83ee8f62b32fe1d505fab8e9213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 23 Oct 2024 10:45:15 +0100 Subject: [PATCH 014/122] app: Ensure project.internal.datastores.variant is defined when changing organizationId #TASK-7118 --- .../manager/VariantStorageManager.java | 3 +- .../v3/v3_0_0/OrganizationMigration.java | 66 ++++++++++++++++--- .../catalog/migration/MigrationTool.java | 12 +++- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index 847a2bb1ac..b8322bbcb8 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -1726,8 +1726,7 @@ public static DataStore defaultDataStore(CatalogManager catalogManager, Project return defaultDataStore(catalogManager.getConfiguration().getDatabasePrefix(), project.getFqn()); } - public static DataStore defaultDataStore(String databasePrefix, String projectFqnStr) - throws CatalogException { + public static DataStore defaultDataStore(String databasePrefix, String projectFqnStr) { CatalogFqn projectFqn = CatalogFqn.extractFqnFromProjectFqn(projectFqnStr); String dbName = buildDatabaseName(databasePrefix, projectFqn.getOrganizationId(), projectFqn.getProjectId()); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index b42b85c18b..25215778dd 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -9,6 +9,7 @@ import org.bson.conversions.Bson; import org.opencb.commons.datastore.mongodb.MongoDataStore; import org.opencb.commons.utils.CryptoUtils; +import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; import org.opencb.opencga.catalog.auth.authentication.CatalogAuthenticationManager; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptorFactory; @@ -20,6 +21,7 @@ import org.opencb.opencga.catalog.io.CatalogIOManager; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.migration.Migration; +import org.opencb.opencga.catalog.migration.MigrationException; import org.opencb.opencga.catalog.migration.MigrationTool; import org.opencb.opencga.catalog.utils.FqnUtils; import org.opencb.opencga.catalog.utils.ParamUtils; @@ -29,12 +31,17 @@ import org.opencb.opencga.core.config.Configuration; import org.opencb.opencga.core.models.migration.MigrationRun; import org.opencb.opencga.core.models.organizations.OrganizationCreateParams; +import org.opencb.opencga.core.models.project.DataStore; +import org.opencb.opencga.storage.core.StorageEngineFactory; +import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.reflections.Reflections; import org.reflections.scanners.SubTypesScanner; import org.reflections.scanners.TypeAnnotationsScanner; import org.reflections.util.ClasspathHelper; import org.reflections.util.ConfigurationBuilder; +import java.io.IOException; import java.lang.reflect.Modifier; import java.net.URL; import java.nio.file.Files; @@ -452,6 +459,8 @@ protected void run() throws Exception { // If the user didn't want to use the userId as the new organization id, we then need to change all the fqn's if (!this.organizationId.equals(this.userId)) { + logger.info("New organization id '{}' is different from original userId '{}'. Changing FQN's from projects and studies" + , this.organizationId, this.userId); changeFqns(); } @@ -460,26 +469,62 @@ protected void run() throws Exception { catalogManager.getMigrationManager().skipPendingMigrations(organizationId, opencgaToken); } - private void changeFqns() throws CatalogDBException { + private void changeFqns() throws CatalogDBException, MigrationException { this.dbAdaptorFactory = this.mongoDBAdaptorFactory; String date = TimeUtils.getTime(); + StorageEngineFactory storageEngineFactory = StorageEngineFactory.get(readStorageConfiguration()); + // Change project fqn's for (String projectCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION)) { - migrateCollection(projectCol, new Document(), Projections.include("_id", "id", "fqn"), (document, bulk) -> { - String currentFqn = document.getString("fqn"); + migrateCollection(projectCol, new Document(), Projections.include("_id", "id", "fqn", "internal.datastores.variant"), (document, bulk) -> { + String oldFqn = document.getString("fqn"); String projectId = document.getString("id"); - String projectFqn = FqnUtils.buildFqn(this.organizationId, projectId); + String newFqn = FqnUtils.buildFqn(this.organizationId, projectId); + logger.info("Changing project fqn from '{}' to '{}'", oldFqn, newFqn); + + Document set = new Document() + .append("fqn", newFqn) + .append("attributes.OPENCGA.3_0_0", new Document() + .append("date", date) + .append("oldFqn", oldFqn) + ); + + Document internal = document.get("internal", Document.class); + if (internal != null) { + Document datastores = internal.get("datastores", Document.class); + if (datastores != null) { + Document variant = datastores.get("variant", Document.class); + if (variant == null) { + DataStore dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldFqn); + logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldFqn); + + // Update only if the project exists in the variant storage + try (VariantStorageEngine variantStorageEngine = storageEngineFactory + .getVariantStorageEngine(dataStore.getStorageEngine(), dataStore.getDbName())) { + if (variantStorageEngine.getMetadataManager().exists()) { + logger.info("Project exists in the variant storage. Setting variant data store: {}", dataStore); + set.append("internal.datastores.variant", new Document() + .append("storageEngine", dataStore.getStorageEngine()) + .append("dbName", dataStore.getDbName()) + .append("options", new Document())); + } else { + logger.info("Project does not exist in the variant storage. Skipping"); + } + } catch (StorageEngineException | IOException e) { + throw new RuntimeException(e); + } + + } + } + } + bulk.add(new UpdateOneModel<>( Filters.eq("_id", document.get("_id")), - new Document("$set", new Document() - .append("fqn", projectFqn) - .append("attributes.OPENCGA.3_0_0", new Document() - .append("date", date) - .append("oldFqn", currentFqn) - ))) + new Document("$set", set)) ); + logger.info("-------"); }); } @@ -496,6 +541,7 @@ private void changeFqns() throws CatalogDBException { String oldStudyFqn = document.getString("fqn"); FqnUtils.FQN oldFqnInstance = FqnUtils.parse(oldStudyFqn); String newFqn = FqnUtils.buildFqn(this.organizationId, oldFqnInstance.getProject(), oldFqnInstance.getStudy()); + logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newFqn); bulk.add(new UpdateOneModel<>( Filters.eq("_id", document.get("_id")), new Document("$set", new Document() diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationTool.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationTool.java index f46e54277d..dd162c0635 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationTool.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationTool.java @@ -151,7 +151,17 @@ protected final void migrateCollection(MongoCollection inputCollection .cursor()) { while (it.hasNext()) { Document document = it.next(); - migrateFunc.accept(document, list); + try { + migrateFunc.accept(document, list); + } catch (Exception e) { + try { + logger.error("Error migrating document: {}", document.toJson()); + } catch (Exception e1) { + e.addSuppressed(e1); + logger.error("Error migrating document: {}", e.getMessage()); + } + throw e; + } if (list.size() >= batchSize) { count += list.size(); From bfe19252a8356dc0a220c0f2152ce5c3aa098e9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 24 Oct 2024 16:07:17 +0100 Subject: [PATCH 015/122] app: Fix NPE. Add appHome to OrganizationMigration. #TASK-7118 --- .../admin/executors/MigrationCommandExecutor.java | 2 +- .../migrations/v3/v3_0_0/OrganizationMigration.java | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java index 1a52874866..5fdada19a9 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java @@ -70,7 +70,7 @@ private void runMigrationToV3() throws Exception { setCatalogDatabaseCredentials(options, options.commonOptions); OrganizationMigration organizationMigration = new OrganizationMigration(configuration, options.commonOptions.adminPassword, - options.user, options.organizationId); + options.user, options.organizationId, Paths.get(appHome)); organizationMigration.execute(); } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 25215778dd..26780bc506 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -9,6 +9,7 @@ import org.bson.conversions.Bson; import org.opencb.commons.datastore.mongodb.MongoDataStore; import org.opencb.commons.utils.CryptoUtils; +import org.opencb.commons.utils.FileUtils; import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; import org.opencb.opencga.catalog.auth.authentication.CatalogAuthenticationManager; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; @@ -45,6 +46,7 @@ import java.lang.reflect.Modifier; import java.net.URL; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; import java.util.*; @@ -70,21 +72,25 @@ private enum MigrationStatus { ERROR } - public OrganizationMigration(Configuration configuration, String adminPassword, String userId, String organizationId) - throws CatalogException { + public OrganizationMigration(Configuration configuration, String adminPassword, String userId, String organizationId, Path appHome) + throws CatalogException, IOException { this.configuration = configuration; this.adminPassword = adminPassword; this.userId = userId; this.organizationId = organizationId; + this.appHome = appHome; this.status = checkAndInit(); } - private MigrationStatus checkAndInit() throws CatalogException { + private MigrationStatus checkAndInit() throws CatalogException, IOException { this.oldDatabase = configuration.getDatabasePrefix() + "_catalog"; this.mongoDBAdaptorFactory = new MongoDBAdaptorFactory(configuration); this.oldDatastore = mongoDBAdaptorFactory.getMongoManager().get(oldDatabase, mongoDBAdaptorFactory.getMongoDbConfiguration()); + FileUtils.checkDirectory(appHome); + readStorageConfiguration(); + MongoCollection userCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION); FindIterable iterable = userCol.find(Filters.eq("id", ParamConstants.OPENCGA_USER_ID)); try (MongoCursor cursor = iterable.cursor()) { From c5375ea45b33154e630180ff9ec38625e93971e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 24 Oct 2024 18:58:59 +0100 Subject: [PATCH 016/122] storage: Ensure walker output is sorted. #TASK-6722 --- .../storage/hadoop/utils/HBaseManager.java | 4 +-- .../hadoop/variant/io/VariantDriver.java | 6 ++-- .../variant/mr/StreamVariantDriver.java | 10 +++--- .../variant/mr/StreamVariantMapper.java | 24 +++++++++++--- .../variant/mr/StreamVariantReducer.java | 33 +++++++++++++++++++ 5 files changed, 65 insertions(+), 12 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseManager.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseManager.java index 1f6cd77efd..2074a30f89 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseManager.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseManager.java @@ -410,8 +410,8 @@ public boolean splitAndMove(Admin admin, TableName tableName, byte[] expectedSpl LOGGER.info("Splitting table '{}' at '{}'", tableName, Bytes.toStringBinary(expectedSplit)); admin.split(tableName, expectedSplit); regionInfo = getRegionInfo(admin, tableName, expectedSplit); - int getRegionInfoAttempts = 10; - while (regionInfo == null) { + int getRegionInfoAttempts = 20; + while (regionInfo == null || regionInfo.isOffline()) { try { Thread.sleep(200); } catch (InterruptedException e) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index 755275263a..10968dd211 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -4,6 +4,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.opencb.commons.datastore.core.Query; @@ -92,7 +93,7 @@ protected void parseAndValidateParameters() throws IOException { protected abstract Class getReducerClass(); - protected abstract Class getOutputFormatClass(); + protected abstract Class getOutputFormatClass(); protected abstract void setupJob(Job job) throws IOException; @@ -109,7 +110,7 @@ protected final Job setupJob(Job job, String archiveTable, String variantTable) throw new IllegalArgumentException("Reducer class not provided!"); } } - Class outputFormatClass = getOutputFormatClass(); + Class outputFormatClass = getOutputFormatClass(); if (outputFormatClass == null) { throw new IllegalArgumentException("Output format class not provided!"); } @@ -118,6 +119,7 @@ protected final Job setupJob(Job job, String archiveTable, String variantTable) if (useReduceStep) { logger.info("Use one Reduce task to produce a single file"); job.setReducerClass(reducerClass); + // TODO: Configure multiple reducers and partitioner job.setNumReduceTasks(1); } else { VariantMapReduceUtil.setNoneReduce(job); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index 5a248e190e..d1c2e73ad4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -8,8 +8,10 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; @@ -39,7 +41,7 @@ public class StreamVariantDriver extends VariantDriver { private Class mapperClass; private Class reducerClass; - private Class outputFormatClass; + private Class outputFormatClass; @Override protected Map getParams() { @@ -110,7 +112,7 @@ protected Class getReducerClass() { } @Override - protected Class getOutputFormatClass() { + protected Class getOutputFormatClass() { return outputFormatClass; } @@ -134,7 +136,7 @@ protected void setupJob(Job job) throws IOException { StreamVariantMapper.setMaxInputBytesPerProcess(job, maxBytesPerMap); StreamVariantMapper.setEnvironment(job, envVars); - reducerClass = Reducer.class; + reducerClass = StreamVariantReducer.class; outputFormatClass = ValueOnlyTextOutputFormat.class; job.setOutputFormatClass(ValueOnlyTextOutputFormat.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index e02ae93c48..09845a8a61 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -53,6 +53,7 @@ public class StreamVariantMapper extends VariantMapper 01 + // 3 -> 03 + // 22 -> 22 + // If the first character is a digit, and the second is not, add a 0 at the beginning + // MT -> MT + // 1_KI270712v1_random -> 01_KI270712v1_random + if (Character.isDigit(chromosome.charAt(0)) && (chromosome.length() == 1 || !Character.isDigit(chromosome.charAt(1)))) { + chromosome = "0" + chromosome; + } + + outputKeyPrefix = String.format("%s|%010d|", chromosome, variant.getStart()); + outputKeyNum = 0; } @Override public void run(Context context) throws IOException, InterruptedException { if (context.nextKeyValue()) { + Variant currentValue = null; try { setup(context); startProcess(context); @@ -121,7 +137,8 @@ public void run(Context context) throws IOException, InterruptedException { closeProcess(context); startProcess(context); } - map(context.getCurrentKey(), context.getCurrentValue(), context); + currentValue = context.getCurrentValue(); + map(context.getCurrentKey(), currentValue, context); } while (!hasExceptions() && context.nextKeyValue()); } catch (Throwable th) { Object currentKey = context.getCurrentKey(); @@ -134,7 +151,6 @@ public void run(Context context) throws IOException, InterruptedException { } String message = "Exception in mapper for key: '" + keyStr + "'"; try { - Variant currentValue = context.getCurrentValue(); if (currentValue != null) { message += " value: '" + currentValue + "'"; } @@ -410,7 +426,7 @@ public void run() { LineReader stdoutLineReader = new LineReader(stdout); try { while (stdoutLineReader.readLine(line) > 0) { - context.write(new ImmutableBytesWritable(Bytes.toBytes(outputKeyNum++)), line); + context.write(new ImmutableBytesWritable(Bytes.toBytes(outputKeyPrefix + (outputKeyNum++))), line); // context.write(null, line); if (verboseStdout) { LOG.info("[STDOUT] - " + line); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java new file mode 100644 index 0000000000..dac1b5dae4 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -0,0 +1,33 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +import java.io.IOException; + +public class StreamVariantReducer extends Reducer { + private static final Log LOG = LogFactory.getLog(StreamVariantReducer.class); + + @Override + protected void setup(Reducer.Context context) throws IOException, InterruptedException { + super.setup(context); + } + + @Override + protected void reduce(ImmutableBytesWritable key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + + for (Text value : values) { + context.write(key, value); + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records").increment(1); + } + + } + + @Override + protected void cleanup(Reducer.Context context) throws IOException, InterruptedException { + super.cleanup(context); + } +} From 663c03ae91df3276816e253ad39dd8d5af6bda20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 10:09:19 +0100 Subject: [PATCH 017/122] storage: Extract walker STDERR file from MR execution. #TASK-6722 --- .../hadoop/utils/AbstractHBaseDriver.java | 28 +++++----- .../VariantTableAggregationDriver.java | 2 +- .../variant/executors/SshMRExecutor.java | 56 +++++++++++-------- .../hadoop/variant/io/VariantDriver.java | 20 ++++--- .../variant/mr/StreamVariantDriver.java | 16 +++++- .../variant/mr/StreamVariantMapper.java | 52 +++++++++++++++-- .../variant/mr/StreamVariantReducer.java | 26 +++++++-- .../variant/mr/StreamVariantMapperTest.java | 41 ++++++++++++++ 8 files changed, 183 insertions(+), 58 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 4a15198f10..3773141eef 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -435,7 +435,7 @@ public Path getOutdir() { * @return List of copied files from HDFS */ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput) throws IOException { - return concatMrOutputToLocal(mrOutdir, localOutput, true); + return concatMrOutputToLocal(mrOutdir, localOutput, true, null); } /** @@ -444,10 +444,12 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput) thro * @param mrOutdir MapReduce output directory * @param localOutput Local file * @param removeExtraHeaders Remove header lines starting with "#" from all files but the first + * @param partFilePrefix Filter partial files with specific prefix. Otherwise, concat them all. * @throws IOException on IOException * @return List of copied files from HDFS */ - protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, boolean removeExtraHeaders) throws IOException { + protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, boolean removeExtraHeaders, String partFilePrefix) + throws IOException { // TODO: Allow copy output to any IOConnector FileSystem fileSystem = mrOutdir.getFileSystem(getConf()); RemoteIterator it = fileSystem.listFiles(mrOutdir, false); @@ -461,10 +463,12 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool && !path.getName().equals(ParquetFileWriter.PARQUET_METADATA_FILE) && !path.getName().equals(ParquetFileWriter.PARQUET_COMMON_METADATA_FILE) && status.getLen() > 0) { - paths.add(path); + if (partFilePrefix == null || path.getName().startsWith(partFilePrefix)) { + paths.add(path); + } } } - if (paths.size() == 0) { + if (paths.isEmpty()) { LOGGER.warn("The MapReduce job didn't produce any output. This may not be expected."); } else if (paths.size() == 1) { LOGGER.info("Copy to local file " + paths.get(0).toUri() + " to " + localOutput.toUri()); @@ -475,17 +479,15 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool LOGGER.info(" Source : " + mrOutdir.toUri()); LOGGER.info(" Target : " + localOutput.toUri()); LOGGER.info(" ---- "); - try (FSDataOutputStream fsOs = localOutput.getFileSystem(getConf()).create(localOutput)) { - boolean isGzip = paths.get(0).getName().endsWith(".gz"); - OutputStream os; - if (isGzip) { - os = new GZIPOutputStream(fsOs); - } else { - os = fsOs; - } + boolean isGzip = paths.get(0).getName().endsWith(".gz"); + try (FSDataOutputStream fsOs = localOutput.getFileSystem(getConf()).create(localOutput); + OutputStream gzOs = isGzip ? new GZIPOutputStream(fsOs) : null) { + OutputStream os = gzOs == null ? fsOs : gzOs; for (int i = 0; i < paths.size(); i++) { Path path = paths.get(i); - LOGGER.info("Concat file : '{}' {} ", path.toUri(), + LOGGER.info("Concat {}file : '{}' {} ", + isGzip ? "gzip " : "", + path.toUri(), humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); try (FSDataInputStream fsIs = fileSystem.open(path)) { InputStream is; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java index 374b5526e8..0d471c8387 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java @@ -144,7 +144,7 @@ protected void postExecution(boolean succeed) throws IOException, StorageEngineE super.postExecution(succeed); if (succeed) { if (localOutput != null) { - concatMrOutputToLocal(outdir, localOutput, isOutputWithHeaders()); + concatMrOutputToLocal(outdir, localOutput, isOutputWithHeaders(), null); } } if (localOutput != null) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index b205511f83..612f3183a9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -106,6 +106,11 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio ObjectMap result = readResult(new String(outputStream.toByteArray(), Charset.defaultCharset())); if (exitValue == 0) { copyOutputFiles(args, env); + for (String key : result.keySet()) { + if (key.startsWith("EXTRA_OUTPUT_")) { + copyOutputFiles(result.getString(key), env); + } + } } return new Result(exitValue, result); } @@ -125,33 +130,38 @@ private Path copyOutputFiles(String[] args, List env) throws StorageEngi List argsList = Arrays.asList(args); int outputIdx = argsList.indexOf("output"); if (outputIdx > 0 && argsList.size() > outputIdx + 1) { - String targetOutput = UriUtils.createUriSafe(argsList.get(outputIdx + 1)).getPath(); - if (StringUtils.isNotEmpty(targetOutput)) { - String remoteOpencgaHome = getOptions().getString(MR_EXECUTOR_SSH_REMOTE_OPENCGA_HOME.key()); - String srcOutput; - if (StringUtils.isNoneEmpty(remoteOpencgaHome, getOpencgaHome())) { - srcOutput = targetOutput.replaceAll(getOpencgaHome(), remoteOpencgaHome); - } else { - srcOutput = targetOutput; - } + return copyOutputFiles(argsList.get(outputIdx + 1), env); + } + // Nothing to copy + return null; + } - String hadoopScpBin = getOptions() - .getString(MR_EXECUTOR_SSH_HADOOP_SCP_BIN.key(), MR_EXECUTOR_SSH_HADOOP_SCP_BIN.defaultValue()); - String commandLine = getBinPath(hadoopScpBin) + " " + srcOutput + " " + targetOutput; + private Path copyOutputFiles(String output, List env) throws StorageEngineException { + String targetOutput = UriUtils.createUriSafe(output).getPath(); + if (StringUtils.isNotEmpty(targetOutput)) { + String remoteOpencgaHome = getOptions().getString(MR_EXECUTOR_SSH_REMOTE_OPENCGA_HOME.key()); + String srcOutput; + if (StringUtils.isNoneEmpty(remoteOpencgaHome, getOpencgaHome())) { + srcOutput = targetOutput.replaceAll(getOpencgaHome(), remoteOpencgaHome); + } else { + srcOutput = targetOutput; + } - Command command = new Command(commandLine, env); - command.run(); - int exitValue = command.getExitValue(); - if (exitValue != 0) { - String sshHost = getOptions().getString(MR_EXECUTOR_SSH_HOST.key()); - String sshUser = getOptions().getString(MR_EXECUTOR_SSH_USER.key()); - throw new StorageEngineException("There was an issue copying files from " - + sshUser + "@" + sshHost + ":" + srcOutput + " to " + targetOutput); - } - return Paths.get(targetOutput); + String hadoopScpBin = getOptions() + .getString(MR_EXECUTOR_SSH_HADOOP_SCP_BIN.key(), MR_EXECUTOR_SSH_HADOOP_SCP_BIN.defaultValue()); + String commandLine = getBinPath(hadoopScpBin) + " " + srcOutput + " " + targetOutput; + + Command command = new Command(commandLine, env); + command.run(); + int exitValue = command.getExitValue(); + if (exitValue != 0) { + String sshHost = getOptions().getString(MR_EXECUTOR_SSH_HOST.key()); + String sshUser = getOptions().getString(MR_EXECUTOR_SSH_USER.key()); + throw new StorageEngineException("There was an issue copying files from " + + sshUser + "@" + sshHost + ":" + srcOutput + " to " + targetOutput); } + return Paths.get(targetOutput); } - // Nothing to copy return null; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index 10968dd211..40d178384a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -48,10 +48,10 @@ public abstract class VariantDriver extends AbstractVariantsTableDriver { public static final String OUTPUT_PARAM = "output"; public static final String CONCAT_OUTPUT_PARAM = "concat-output"; - private Path outdir; - private Path localOutput; - private Query query = new Query(); - private QueryOptions options = new QueryOptions(); + protected Path outdir; + protected Path localOutput; + private final Query query = new Query(); + private final QueryOptions options = new QueryOptions(); private static Logger logger = LoggerFactory.getLogger(VariantDriver.class); protected boolean useReduceStep; @@ -166,14 +166,16 @@ protected final Job setupJob(Job job, String archiveTable, String variantTable) @Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); - if (succeed) { - if (localOutput != null) { - concatMrOutputToLocal(outdir, localOutput); - } - } if (localOutput != null) { + if (succeed) { + copyMrOutputToLocal(); + } deleteTemporaryFile(outdir); } } + protected void copyMrOutputToLocal() throws IOException { + concatMrOutputToLocal(outdir, localOutput, true, null); + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index d1c2e73ad4..9314c1e981 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.mr; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; @@ -138,7 +139,11 @@ protected void setupJob(Job job) throws IOException { reducerClass = StreamVariantReducer.class; - outputFormatClass = ValueOnlyTextOutputFormat.class; + MultipleOutputs.addNamedOutput(job, "stdout", ValueOnlyTextOutputFormat.class, keyClass, valueClass); + MultipleOutputs.addNamedOutput(job, "stderr", ValueOnlyTextOutputFormat.class, keyClass, valueClass); + LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); + outputFormatClass = LazyOutputFormat.class; + job.setOutputFormatClass(ValueOnlyTextOutputFormat.class); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, GzipCodec.class); @@ -152,6 +157,15 @@ protected String getJobOperationName() { return "stream-variants"; } + + @Override + protected void copyMrOutputToLocal() throws IOException { + concatMrOutputToLocal(outdir, localOutput, true, "stdout"); + Path stderrOutput = localOutput.suffix(".stderr.txt.gz"); + concatMrOutputToLocal(outdir, stderrOutput, true, "stderr"); + printKeyValue("EXTRA_OUTPUT_STDERR", stderrOutput); + } + @SuppressWarnings("unchecked") public static void main(String[] args) { main(args, (Class) MethodHandles.lookup().lookupClass()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 09845a8a61..58f3a87188 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -17,6 +18,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.exec.Command; import org.opencb.commons.io.DataWriter; +import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -52,9 +54,13 @@ public class StreamVariantMapper extends VariantMapper 01 // 3 -> 03 @@ -118,8 +130,7 @@ protected void setup(Context context) throws IOException, InterruptedException { chromosome = "0" + chromosome; } - outputKeyPrefix = String.format("%s|%010d|", chromosome, variant.getStart()); - outputKeyNum = 0; + return String.format("%s|%010d|", chromosome, start); } @Override @@ -334,6 +345,7 @@ private void startProcess(Context context) throws IOException, StorageEngineExce // envs.forEach((k, v) -> LOG.info("Config ENV: " + k + "=" + v)); builder.environment().putAll(envs); process = builder.start(); + processCount++; stdin = new DataOutputStream(new BufferedOutputStream( process.getOutputStream(), @@ -415,6 +427,7 @@ private class MROutputThread extends Thread { private final Mapper.Context context; private long lastStdoutReport = 0; + private int numRecords = 0; MROutputThread(Context context) { this.context = context; @@ -426,8 +439,7 @@ public void run() { LineReader stdoutLineReader = new LineReader(stdout); try { while (stdoutLineReader.readLine(line) > 0) { - context.write(new ImmutableBytesWritable(Bytes.toBytes(outputKeyPrefix + (outputKeyNum++))), line); -// context.write(null, line); + write(line); if (verboseStdout) { LOG.info("[STDOUT] - " + line); } @@ -448,6 +460,12 @@ public void run() { addException(th); } } + + private void write(Text line) throws IOException, InterruptedException { + numRecords++; + context.write(new ImmutableBytesWritable( + Bytes.toBytes(StreamVariantReducer.STDOUT_KEY + outputKeyPrefix + (stdoutKeyNum++))), line); + } } private class MRErrorThread extends Thread { @@ -475,6 +493,13 @@ public void run() { Text line = new Text(); LineReader stderrLineReader = new LineReader(stderr); try { + StopWatch stopWatch = StopWatch.createStarted(); + write("---------- " + context.getTaskAttemptID().toString() + " -----------"); + write("Start time : " + TimeUtils.getTimeMillis()); + write("Batch start : " + firstKey + " -> " + outputKeyPrefix); + write("sub-process #" + processCount); + write("--- START STDERR ---"); + int numRecords = 0; while (stderrLineReader.readLine(line) > 0) { String lineStr = line.toString(); if (matchesReporter(lineStr)) { @@ -493,6 +518,8 @@ public void run() { while (stderrBufferSize > STDERR_BUFFER_CAPACITY && stderrBuffer.size() > 3) { stderrBufferSize -= stderrBuffer.remove().length(); } + write(line); + numRecords++; LOG.info("[STDERR] - " + lineStr); } long now = System.currentTimeMillis(); @@ -502,11 +529,24 @@ public void run() { } line.clear(); } + write("--- END STDERR ---"); + write("Execution time : " + TimeUtils.durationToString(stopWatch)); + write("STDOUT lines : " + stdoutThread.numRecords); + write("STDERR lines : " + numRecords); } catch (Throwable th) { addException(th); } } + private void write(String line) throws IOException, InterruptedException { + write(new Text(line)); + } + + private void write(Text line) throws IOException, InterruptedException { + context.write(new ImmutableBytesWritable( + Bytes.toBytes(StreamVariantReducer.STDERR_KEY + outputKeyPrefix + (stderrKeyNum++))), line); + } + private boolean matchesReporter(String line) { return line.startsWith(reporterPrefix); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index dac1b5dae4..cfe798a5b8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -3,31 +3,47 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import java.io.IOException; public class StreamVariantReducer extends Reducer { + + public static final String STDOUT_KEY = "O:"; + public static final byte[] STDOUT_KEY_BYTES = Bytes.toBytes(STDOUT_KEY); + public static final String STDERR_KEY = "E:"; + public static final byte[] STDERR_KEY_BYTES = Bytes.toBytes(STDERR_KEY); + private static final Log LOG = LogFactory.getLog(StreamVariantReducer.class); + private MultipleOutputs mos; @Override protected void setup(Reducer.Context context) throws IOException, InterruptedException { super.setup(context); + mos = new MultipleOutputs<>(context); } @Override - protected void reduce(ImmutableBytesWritable key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { - + protected void reduce(ImmutableBytesWritable key, Iterable values, Reducer.Context context) + throws IOException, InterruptedException { for (Text value : values) { - context.write(key, value); - context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records").increment(1); + if (Bytes.equals(key.get(), key.getOffset(), STDOUT_KEY_BYTES.length, STDOUT_KEY_BYTES, 0, STDOUT_KEY_BYTES.length)) { + mos.write("stdout", key, value); + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records").increment(1); + } else { + mos.write("stderr", key, value); + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stderr_records").increment(1); + } + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "records").increment(1); } - } @Override protected void cleanup(Reducer.Context context) throws IOException, InterruptedException { super.cleanup(context); + mos.close(); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java new file mode 100644 index 0000000000..690a16df5f --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java @@ -0,0 +1,41 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import static org.junit.Assert.*; + + +@Category(ShortTests.class) +public class StreamVariantMapperTest { + @Test + public void buildOutputKeyPrefixSingleDigitChromosome() { + String result = StreamVariantMapper.buildOutputKeyPrefix("1", 100); + assertEquals("01|0000000100|", result); + } + + @Test + public void buildOutputKeyPrefixDoubleDigitChromosome() { + String result = StreamVariantMapper.buildOutputKeyPrefix("22", 100); + assertEquals("22|0000000100|", result); + } + + @Test + public void buildOutputKeyPrefixRandomChromosome() { + String result = StreamVariantMapper.buildOutputKeyPrefix("1_KI270712v1_random", 100); + assertEquals("01_KI270712v1_random|0000000100|", result); + } + + @Test + public void buildOutputKeyPrefixMTChromosome() { + String result = StreamVariantMapper.buildOutputKeyPrefix("MT", 100); + assertEquals("MT|0000000100|", result); + } + + @Test + public void buildOutputKeyPrefixXChromosome() { + String result = StreamVariantMapper.buildOutputKeyPrefix("X", 100); + assertEquals("X|0000000100|", result); + } +} \ No newline at end of file From 154befa4baec3f7bba3773eb7ec7b6c1b5f7f438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 10:21:57 +0100 Subject: [PATCH 018/122] storage: Do not write multiple headers. #TASK-6722 --- .../variant/mr/StreamVariantReducer.java | 43 ++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index cfe798a5b8..3a52bfbfbc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -18,20 +18,37 @@ public class StreamVariantReducer extends Reducer mos; + private boolean headerWritten = false; @Override - protected void setup(Reducer.Context context) throws IOException, InterruptedException { + protected void setup(Reducer.Context context) + throws IOException, InterruptedException { super.setup(context); mos = new MultipleOutputs<>(context); } @Override - protected void reduce(ImmutableBytesWritable key, Iterable values, Reducer.Context context) + protected void reduce(ImmutableBytesWritable key, Iterable values, + Reducer.Context context) throws IOException, InterruptedException { for (Text value : values) { - if (Bytes.equals(key.get(), key.getOffset(), STDOUT_KEY_BYTES.length, STDOUT_KEY_BYTES, 0, STDOUT_KEY_BYTES.length)) { - mos.write("stdout", key, value); + if (hasPrefix(key, STDOUT_KEY_BYTES)) { + if (hasPrefix(value, HEADER_PREFIX_BYTES)) { + if (headerWritten) { + // skip header + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "header_records_skip").increment(1); + } else { + mos.write("stdout", key, value); + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "header_records").increment(1); + } + } else { + // No more header, assume all header is written + headerWritten = true; + mos.write("stdout", key, value); + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "body_records").increment(1); + } context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records").increment(1); } else { mos.write("stderr", key, value); @@ -41,8 +58,24 @@ protected void reduce(ImmutableBytesWritable key, Iterable values, Reducer } } + private static boolean hasPrefix(ImmutableBytesWritable key, byte[] prefix) { + return hasPrefix(key.get(), key.getOffset(), key.getLength(), prefix); + } + + private static boolean hasPrefix(Text text, byte[] prefix) { + return hasPrefix(text.getBytes(), 0, text.getLength(), prefix); + } + + private static boolean hasPrefix(byte[] key, int offset, int length, byte[] prefix) { + if (length < prefix.length) { + return false; + } + return Bytes.equals(key, offset, prefix.length, prefix, 0, prefix.length); + } + @Override - protected void cleanup(Reducer.Context context) throws IOException, InterruptedException { + protected void cleanup(Reducer.Context context) + throws IOException, InterruptedException { super.cleanup(context); mos.close(); } From 85aac6d78d03ef2e4036f53d1fddb6b48185bdaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 14:15:28 +0100 Subject: [PATCH 019/122] storage: Fix NoSuchMethodError creating StopWatch. #TASK-6722 --- .../storage/hadoop/variant/mr/StreamVariantMapper.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 58f3a87188..626a017512 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -2,7 +2,6 @@ import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -12,6 +11,7 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.util.LineReader; +import org.apache.hadoop.util.StopWatch; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.metadata.VariantMetadata; import org.opencb.commons.datastore.core.Query; @@ -27,6 +27,7 @@ import java.io.*; import java.util.*; +import java.util.concurrent.TimeUnit; import static org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper.COUNTER_GROUP_NAME; @@ -493,7 +494,8 @@ public void run() { Text line = new Text(); LineReader stderrLineReader = new LineReader(stderr); try { - StopWatch stopWatch = StopWatch.createStarted(); + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); write("---------- " + context.getTaskAttemptID().toString() + " -----------"); write("Start time : " + TimeUtils.getTimeMillis()); write("Batch start : " + firstKey + " -> " + outputKeyPrefix); @@ -530,7 +532,7 @@ public void run() { line.clear(); } write("--- END STDERR ---"); - write("Execution time : " + TimeUtils.durationToString(stopWatch)); + write("Execution time : " + TimeUtils.durationToString(stopWatch.now(TimeUnit.MILLISECONDS))); write("STDOUT lines : " + stdoutThread.numRecords); write("STDERR lines : " + numRecords); } catch (Throwable th) { From 697b08bea39a36575658c2c04640eb5eee8b1a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 15:36:23 +0100 Subject: [PATCH 020/122] storage: Ensure stderr file is moved from scratch dir. #TASK-6722 --- .../opencb/opencga/analysis/variant/VariantWalkerTool.java | 2 +- .../analysis/variant/manager/VariantStorageManager.java | 2 +- .../opencga/storage/core/variant/VariantStorageEngine.java | 4 ++-- .../storage/core/variant/dummy/DummyVariantStorageEngine.java | 2 +- .../storage/hadoop/variant/HadoopVariantStorageEngine.java | 4 ++-- .../storage/hadoop/variant/mr/StreamVariantDriver.java | 3 ++- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index 56e008e0da..68ad63d354 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -82,7 +82,7 @@ protected void run() throws Exception { Query query = toolParams.toQuery(); QueryOptions queryOptions = new QueryOptions().append(QueryOptions.INCLUDE, toolParams.getInclude()) .append(QueryOptions.EXCLUDE, toolParams.getExclude()); - uris.add(variantStorageManager.walkData(outputFile, + uris.addAll(variantStorageManager.walkData(outputFile, format, query, queryOptions, toolParams.getDockerImage(), toolParams.getCommandLine(), token)); }); step("move-files", () -> { diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index f292e6d6a3..a5d02ab020 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -204,7 +204,7 @@ public List exportData(String outputFile, VariantOutputFormat outputFormat, * @throws StorageEngineException If there is any error exporting variants * @return generated files */ - public URI walkData(String outputFile, VariantOutputFormat format, + public List walkData(String outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, String dockerImage, String commandLine, String token) throws CatalogException, StorageEngineException { String anyStudy = catalogUtils.getAnyStudy(query, token); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index b10b2c7305..81ddc4c0e3 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -285,7 +285,7 @@ public List exportData(URI outputFile, VariantOutputFormat outputFormat, UR return exporter.export(outputFile, outputFormat, variantsFile, parsedVariantQuery); } - public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, + public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String dockerImage, String commandLine) throws IOException, StorageEngineException { if (format == VariantWriterFactory.VariantOutputFormat.VCF || format == VariantWriterFactory.VariantOutputFormat.VCF_GZ) { @@ -326,7 +326,7 @@ public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat for } - public abstract URI walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, + public abstract List walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException; diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java index 65a0169ef8..e10370dcaa 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantStorageEngine.java @@ -143,7 +143,7 @@ public void importData(URI input, VariantMetadata metadata, List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { throw new UnsupportedOperationException("Unable to walk data in " + getStorageEngineId()); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 8598407233..023dbbaeec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -317,7 +317,7 @@ protected VariantExporter newVariantExporter(VariantMetadataFactory metadataFact } @Override - public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, + public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { ParsedVariantQuery variantQuery = parseQuery(query, queryOptions); int studyId = variantQuery.getStudyQuery().getDefaultStudy().getId(); @@ -342,7 +342,7 @@ public URI walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat for .append(StreamVariantDriver.INPUT_FORMAT_PARAM, format.toString()) .append(StreamVariantDriver.OUTPUT_PARAM, outputFile) ), "Walk data"); - return outputFile; + return Arrays.asList(outputFile, UriUtils.createUriSafe(outputFile.toString() + StreamVariantDriver.STDERR_TXT_GZ)); } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index 9314c1e981..cdab0e9e3c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -33,6 +33,7 @@ public class StreamVariantDriver extends VariantDriver { public static final String COMMAND_LINE_BASE64_PARAM = "commandLineBase64"; public static final String MAX_BYTES_PER_MAP_PARAM = "maxBytesPerMap"; public static final String ENVIRONMENT_VARIABLES = "envVars"; + public static final String STDERR_TXT_GZ = ".stderr.txt.gz"; private VariantWriterFactory.VariantOutputFormat format; private int maxBytesPerMap; @@ -161,7 +162,7 @@ protected String getJobOperationName() { @Override protected void copyMrOutputToLocal() throws IOException { concatMrOutputToLocal(outdir, localOutput, true, "stdout"); - Path stderrOutput = localOutput.suffix(".stderr.txt.gz"); + Path stderrOutput = localOutput.suffix(STDERR_TXT_GZ); concatMrOutputToLocal(outdir, stderrOutput, true, "stderr"); printKeyValue("EXTRA_OUTPUT_STDERR", stderrOutput); } From 85671b9143458d5a6986bd9ce5eb2a6103450fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 16:15:59 +0100 Subject: [PATCH 021/122] app: Rename StudyMetadata.name to match the new fqn. #TASK-7118 --- .../v3/v3_0_0/OrganizationMigration.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 26780bc506..3d3d98d9ca 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -52,6 +52,8 @@ import java.util.*; import java.util.stream.Collectors; +import static org.opencb.opencga.core.config.storage.StorageConfiguration.Mode.READ_ONLY; + @Migration(id = "add_organizations", description = "Add new Organization layer #TASK-4389", version = "3.0.0", language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20231212) public class OrganizationMigration extends MigrationTool { @@ -65,6 +67,7 @@ public class OrganizationMigration extends MigrationTool { private Set userIdsToDiscardData; private MigrationStatus status; + private boolean changeOrganizationId; private enum MigrationStatus { MIGRATED, @@ -212,6 +215,11 @@ private MigrationStatus checkAndInit() throws CatalogException, IOException { if (StringUtils.isEmpty(this.organizationId)) { this.organizationId = this.userId; } + changeOrganizationId = !this.organizationId.equals(this.userId); + if (changeOrganizationId && readStorageConfiguration().getMode() == READ_ONLY) { + throw new CatalogException("Cannot change organization id when storage is in read-only mode"); + } + ParamUtils.checkIdentifier(this.organizationId, "Organization id"); this.catalogManager = new CatalogManager(configuration); return MigrationStatus.PENDING_MIGRATION; @@ -464,7 +472,7 @@ protected void run() throws Exception { } // If the user didn't want to use the userId as the new organization id, we then need to change all the fqn's - if (!this.organizationId.equals(this.userId)) { + if (changeOrganizationId) { logger.info("New organization id '{}' is different from original userId '{}'. Changing FQN's from projects and studies" , this.organizationId, this.userId); changeFqns(); @@ -515,6 +523,13 @@ private void changeFqns() throws CatalogDBException, MigrationException { .append("storageEngine", dataStore.getStorageEngine()) .append("dbName", dataStore.getDbName()) .append("options", new Document())); + variantStorageEngine.getMetadataManager().updateStudyMetadata(oldFqn, studyMetadata -> { + studyMetadata.setName(newFqn); + studyMetadata.getAttributes().put("OPENCGA.3_0_0", new Document() + .append("date", date) + .append("oldFqn", oldFqn) + ); + }); } else { logger.info("Project does not exist in the variant storage. Skipping"); } From 356567e96e9f13b087c2857cf3a763718cf9694e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 16:23:20 +0100 Subject: [PATCH 022/122] storage: Fix stderr sorting. #TASK-6722 --- .../storage/hadoop/variant/mr/StreamVariantMapper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 626a017512..f8bba49fbc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -465,7 +465,7 @@ public void run() { private void write(Text line) throws IOException, InterruptedException { numRecords++; context.write(new ImmutableBytesWritable( - Bytes.toBytes(StreamVariantReducer.STDOUT_KEY + outputKeyPrefix + (stdoutKeyNum++))), line); + Bytes.toBytes(String.format("%s%s%08d", StreamVariantReducer.STDOUT_KEY, outputKeyPrefix, stdoutKeyNum++))), line); } } @@ -546,7 +546,7 @@ private void write(String line) throws IOException, InterruptedException { private void write(Text line) throws IOException, InterruptedException { context.write(new ImmutableBytesWritable( - Bytes.toBytes(StreamVariantReducer.STDERR_KEY + outputKeyPrefix + (stderrKeyNum++))), line); + Bytes.toBytes(String.format("%s%s%08d", StreamVariantReducer.STDERR_KEY, outputKeyPrefix, stderrKeyNum++))), line); } private boolean matchesReporter(String line) { From 6253da302c4137e8d4e0b491b009b9a090879cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 25 Oct 2024 17:47:14 +0100 Subject: [PATCH 023/122] storage: Write `\n` after the json header #TASK-6722 --- .../opencga/storage/hadoop/variant/mr/StreamVariantMapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index f8bba49fbc..0e45c182c0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -372,6 +372,7 @@ private void startProcess(Context context) throws IOException, StorageEngineExce } ObjectMapper objectMapper = new ObjectMapper().configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); objectMapper.writeValue((DataOutput) stdin, metadata); + stdin.write('\n'); } processedBytes = 0; From f73fbc16b4c12e68cb3f2b94ec6e7de2ddc12714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 28 Oct 2024 14:11:43 +0000 Subject: [PATCH 024/122] app: Fix rename storage studyFqn #TASK-7118 --- .../v3/v3_0_0/OrganizationMigration.java | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 3d3d98d9ca..58e50f750b 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -493,16 +493,16 @@ private void changeFqns() throws CatalogDBException, MigrationException { for (String projectCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION)) { migrateCollection(projectCol, new Document(), Projections.include("_id", "id", "fqn", "internal.datastores.variant"), (document, bulk) -> { - String oldFqn = document.getString("fqn"); String projectId = document.getString("id"); - String newFqn = FqnUtils.buildFqn(this.organizationId, projectId); - logger.info("Changing project fqn from '{}' to '{}'", oldFqn, newFqn); + String oldProjectFqn = document.getString("fqn"); + String newProjectFqn = FqnUtils.buildFqn(this.organizationId, projectId); + logger.info("Changing project fqn from '{}' to '{}'", oldProjectFqn, newProjectFqn); Document set = new Document() - .append("fqn", newFqn) + .append("fqn", newProjectFqn) .append("attributes.OPENCGA.3_0_0", new Document() .append("date", date) - .append("oldFqn", oldFqn) + .append("oldFqn", oldProjectFqn) ); Document internal = document.get("internal", Document.class); @@ -511,8 +511,8 @@ private void changeFqns() throws CatalogDBException, MigrationException { if (datastores != null) { Document variant = datastores.get("variant", Document.class); if (variant == null) { - DataStore dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldFqn); - logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldFqn); + DataStore dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldProjectFqn); + logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldProjectFqn); // Update only if the project exists in the variant storage try (VariantStorageEngine variantStorageEngine = storageEngineFactory @@ -523,13 +523,18 @@ private void changeFqns() throws CatalogDBException, MigrationException { .append("storageEngine", dataStore.getStorageEngine()) .append("dbName", dataStore.getDbName()) .append("options", new Document())); - variantStorageEngine.getMetadataManager().updateStudyMetadata(oldFqn, studyMetadata -> { - studyMetadata.setName(newFqn); - studyMetadata.getAttributes().put("OPENCGA.3_0_0", new Document() - .append("date", date) - .append("oldFqn", oldFqn) - ); - }); + + for (String oldStudyFqn : variantStorageEngine.getMetadataManager().getStudies().keySet()) { + String newStudyFqn = FqnUtils.buildFqn(this.organizationId, projectId, FqnUtils.parse(oldStudyFqn).getStudy()); + logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newStudyFqn); + variantStorageEngine.getMetadataManager().updateStudyMetadata(oldStudyFqn, studyMetadata -> { + studyMetadata.setName(newStudyFqn); + studyMetadata.getAttributes().put("OPENCGA.3_0_0", new Document() + .append("date", date) + .append("oldFqn", oldStudyFqn) + ); + }); + } } else { logger.info("Project does not exist in the variant storage. Skipping"); } From d796b3d082a3d9f33490660db5a1487e219aab8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 28 Oct 2024 14:12:59 +0000 Subject: [PATCH 025/122] app: Ensure OrganizationMigration is marked as "manual" to avoid automatic execution. #TASK-7118 --- .../opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 58e50f750b..0ff69f25a0 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -55,7 +55,7 @@ import static org.opencb.opencga.core.config.storage.StorageConfiguration.Mode.READ_ONLY; @Migration(id = "add_organizations", description = "Add new Organization layer #TASK-4389", version = "3.0.0", - language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20231212) + language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20231212, manual = true) public class OrganizationMigration extends MigrationTool { private final Configuration configuration; private final String adminPassword; From 5789628871a47ebadc502a48e5137aa18d8d283e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 29 Oct 2024 16:02:35 +0000 Subject: [PATCH 026/122] storage: Do not interrupt header with empty records. #TASK-6722 --- .../hadoop/variant/mr/StreamVariantReducer.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index 3a52bfbfbc..81e31be888 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -1,5 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.mr; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; @@ -44,8 +45,13 @@ protected void reduce(ImmutableBytesWritable key, Iterable values, context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "header_records").increment(1); } } else { - // No more header, assume all header is written - headerWritten = true; + if (value.getLength() < 3 && StringUtils.isBlank(value.toString())) { + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records_empty").increment(1); + // Do not interrupt header with empty records + } else { + // No more header, assume all header is written + headerWritten = true; + } mos.write("stdout", key, value); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "body_records").increment(1); } From 4ff0655de9f412a9823a8ddb1219411fdeeb844a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 29 Oct 2024 17:26:10 +0000 Subject: [PATCH 027/122] storage: Add a custom Partitioner to ensure sorted data with multiple reducers #TASK-6722 --- .../storage/hadoop/variant/GenomeHelper.java | 42 +++++++++--- .../hadoop/variant/io/VariantDriver.java | 11 ++-- .../variant/mr/StreamVariantDriver.java | 21 ++++++ .../variant/mr/StreamVariantMapper.java | 1 + .../variant/mr/StreamVariantPartitioner.java | 64 +++++++++++++++++++ .../variant/mr/StreamVariantReducer.java | 1 + .../mr/StreamVariantPartitionerTest.java | 57 +++++++++++++++++ 7 files changed, 185 insertions(+), 12 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java index 680f312a40..0276898a61 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java @@ -97,12 +97,25 @@ public int getChunkSize() { } /** - * TODO: Query CellBase to get the chromosomes and sizes! * @param numberOfSplits Number of splits * @param keyGenerator Function to generate the rowKeys given a chromosome and a start * @return List of splits */ public static List generateBootPreSplitsHuman(int numberOfSplits, BiFunction keyGenerator) { + return generateBootPreSplitsHuman(numberOfSplits, keyGenerator, Bytes::compareTo, true); + } + + /** + * TODO: Query CellBase to get the chromosomes and sizes! + * @param numberOfSplits Number of splits + * @param keyGenerator Function to generate the rowKeys given a chromosome and a start + * @param compareTo Comparator to sort the splits + * @param includeEndSplit Include the last split + * @param Type of the split + * @return List of splits + */ + public static List generateBootPreSplitsHuman(int numberOfSplits, BiFunction keyGenerator, + Comparator compareTo, boolean includeEndSplit) { String[] chr = new String[]{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", }; long[] posarr = new long[]{249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, @@ -112,20 +125,20 @@ public static List generateBootPreSplitsHuman(int numberOfSplits, BiFunc for (int i = 0; i < chr.length; i++) { regions.put(chr[i], posarr[i]); } - return generateBootPreSplits(numberOfSplits, keyGenerator, regions); + return generateBootPreSplits(numberOfSplits, keyGenerator, regions, compareTo, includeEndSplit); } - static List generateBootPreSplits(int numberOfSplits, BiFunction keyGenerator, - Map regionsMap) { + static List generateBootPreSplits(int numberOfSplits, BiFunction keyGenerator, + Map regionsMap, Comparator comparator, boolean includeEndSplit) { // Create a sorted map for the regions that sorts as will sort HBase given the row_key generator // In archive table, chr1 goes after chr19, and in Variants table, chr1 is always the first SortedMap sortedRegions = new TreeMap<>((s1, s2) -> - Bytes.compareTo(keyGenerator.apply(s1, 0), keyGenerator.apply(s2, 0))); + comparator.compare(keyGenerator.apply(s1, 0), keyGenerator.apply(s2, 0))); sortedRegions.putAll(regionsMap); - long total = sortedRegions.values().stream().reduce((a, b) -> a + b).orElse(0L); + long total = regionsMap.values().stream().mapToLong(Long::longValue).sum(); long chunkSize = total / numberOfSplits; - List splitList = new ArrayList<>(); + List splitList = new ArrayList<>(); long splitPos = chunkSize; while (splitPos < total) { long tmpPos = 0; @@ -139,10 +152,23 @@ static List generateBootPreSplits(int numberOfSplits, BiFunction admin.getClusterStatus().getServersSize()); + // Set the number of reduce tasks to 2x the number of hosts + reduceTasks = serversSize * 2; + logger.info("Set reduce tasks to " + reduceTasks + " (derived from 'number_of_servers * 2')"); + } + job.setReducerClass(getReducerClass()); + job.setPartitionerClass(StreamVariantPartitioner.class); + job.setNumReduceTasks(reduceTasks); + // TODO: Use a grouping comparator to group by chromosome and position, ignoring the rest of the key? +// job.setGroupingComparatorClass(StreamVariantGroupingComparator.class); +// job.setSortComparatorClass(); + } + @Override protected String getJobOperationName() { return "stream-variants"; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 0e45c182c0..cfcf360452 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -143,6 +143,7 @@ public void run(Context context) throws IOException, InterruptedException { startProcess(context); // Do-while instead of "while", as we've already called context.nextKeyValue() once do { + // FIXME: If the chromosome is different, we should start a new process and get a new outputKeyPrefix if (processedBytes > maxInputBytesPerProcess) { LOG.info("Processed bytes = " + processedBytes + " > " + maxInputBytesPerProcess + ". Restarting process."); context.getCounter(COUNTER_GROUP_NAME, "RESTARTED_PROCESS").increment(1); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java new file mode 100644 index 0000000000..658ff0329f --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java @@ -0,0 +1,64 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Partitioner; +import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; + +import javax.xml.soap.Text; +import java.io.IOException; +import java.util.List; +import java.util.TreeMap; + +public class StreamVariantPartitioner extends Partitioner implements Configurable { + + private TreeMap regionSplitsMap = new TreeMap<>(); + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + try { + Job job = Job.getInstance(conf); + int numReduceTasks = job.getNumReduceTasks(); + setup(numReduceTasks); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public TreeMap setup(int numPartitions) { + List splits = GenomeHelper.generateBootPreSplitsHuman( + numPartitions, StreamVariantMapper::buildOutputKeyPrefix, String::compareTo, false); + regionSplitsMap.put(StreamVariantMapper.buildOutputKeyPrefix("0", 0), 0); + for (int i = 0; i < splits.size(); i++) { + regionSplitsMap.put(splits.get(i), regionSplitsMap.size()); + } + return regionSplitsMap; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public int getPartition(ImmutableBytesWritable key, Text text, int numPartitions) { + int start = key.getOffset() + StreamVariantReducer.STDOUT_KEY_BYTES.length; + byte[] bytes = key.get(); + // Find last '|' + int idx = 0; + for (int i = key.getLength() + key.getOffset() - 1; i >= 0; i--) { + if (bytes[i] == '|') { + idx = i; + break; + } + } + String chrPos = Bytes.toString(bytes, start, idx - start); + return regionSplitsMap.floorEntry(chrPos).getValue(); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index 81e31be888..a6684c2d07 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -45,6 +45,7 @@ protected void reduce(ImmutableBytesWritable key, Iterable values, context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "header_records").increment(1); } } else { + // length < 3 to include lines with a small combination of \n \r \t and spaces. if (value.getLength() < 3 && StringUtils.isBlank(value.toString())) { context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records_empty").increment(1); // Do not interrupt header with empty records diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java new file mode 100644 index 0000000000..bbce3cd5cf --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java @@ -0,0 +1,57 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import static org.junit.Assert.assertEquals; + +@Category(ShortTests.class) +public class StreamVariantPartitionerTest { + + public static final int NUM_PARTITIONS = 10; + private StreamVariantPartitioner partitioner; + + @Before + public void setUp() { + partitioner = new StreamVariantPartitioner(); + partitioner.setup(NUM_PARTITIONS); + } + + @Test + public void partitionerTest() { + assertEquals(0, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:00|0000000001|")), null, NUM_PARTITIONS)); + assertEquals(0, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:01|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(0, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:02|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(1, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:03|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(2, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:04|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(2, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:05|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(3, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:06|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(3, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:07|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(4, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:08|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(4, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:09|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(5, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:10|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(5, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:11|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(6, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:12|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(6, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:13|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(7, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:14|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(7, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:15|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(7, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:16|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:17|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:17_random_contig|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:18|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:19|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:20|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:21|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:22|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:X|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:Y|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:MT|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:Z|0000000000|")), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:Z_random_contig|0000000000|")), null, NUM_PARTITIONS)); + } + +} \ No newline at end of file From 82682667d9de2a516c7d1341446c9b8e670449c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 29 Oct 2024 17:50:53 +0000 Subject: [PATCH 028/122] storage: Fix partitioner. #TASK-6722 --- .../storage/hadoop/variant/mr/StreamVariantDriver.java | 5 +++-- .../storage/hadoop/variant/mr/StreamVariantPartitioner.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index dede7b6789..ccf2007408 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -155,11 +155,12 @@ protected void setupJob(Job job) throws IOException { @Override protected void setupReducer(Job job, String variantTableName) throws IOException { - String numReducersStr = getParam(JobContext.NUM_REDUCES); + String numReducersKey = getClass().getSimpleName() + "." + JobContext.NUM_REDUCES; + String numReducersStr = getParam(numReducersKey); int reduceTasks; if (StringUtils.isNotEmpty(numReducersStr)) { reduceTasks = Integer.parseInt(numReducersStr); - logger.info("Set reduce tasks to " + reduceTasks + " (derived from input parameter '" + JobContext.NUM_REDUCES + "')"); + logger.info("Set reduce tasks to " + reduceTasks + " (derived from input parameter '" + numReducersKey + "')"); } else { int serversSize = getHBaseManager().act(variantTableName, (table, admin) -> admin.getClusterStatus().getServersSize()); // Set the number of reduce tasks to 2x the number of hosts diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java index 658ff0329f..d2e1f0056c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java @@ -4,11 +4,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Partitioner; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; -import javax.xml.soap.Text; import java.io.IOException; import java.util.List; import java.util.TreeMap; From 4147d0157ef4a30f7af953be237b3b345c8cbb6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 29 Oct 2024 17:52:15 +0000 Subject: [PATCH 029/122] storage: Restart process when changing chromosome to ensure correct sorting. #TASK-6722 --- .../variant/mr/StreamVariantMapper.java | 58 +++++++++++++------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index cfcf360452..6899398099 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -53,16 +53,13 @@ public class StreamVariantMapper extends VariantMapper variantDataWriter; + protected final List throwables = Collections.synchronizedList(new ArrayList<>()); private int processedBytes = 0; private long numRecordsRead = 0; private long numRecordsWritten = 0; - protected final List throwables = Collections.synchronizedList(new ArrayList<>()); + // auto-incremental number for each produced record. + // This is used with the outputKeyPrefix to ensure a sorted output. + private int stdoutKeyNum; + private int stderrKeyNum; + private String currentChromosome; + private int currentPosition; + private String outputKeyPrefix; private volatile boolean processProvidedStatus_ = false; private VariantMetadata metadata; @@ -112,11 +116,6 @@ protected void setup(Context context) throws IOException, InterruptedException { writerFactory = new VariantWriterFactory(metadataManager); query = VariantMapReduceUtil.getQueryFromConfig(conf); options = VariantMapReduceUtil.getQueryOptionsFromConfig(conf); - Variant variant = context.getCurrentValue(); - firstKey = variant.getChromosome() + ":" + variant.getStart(); - outputKeyPrefix = buildOutputKeyPrefix(variant.getChromosome(), variant.getStart()); - stdoutKeyNum = 0; - stderrKeyNum = 0; } public static String buildOutputKeyPrefix(String chromosome, Integer start) { @@ -143,14 +142,17 @@ public void run(Context context) throws IOException, InterruptedException { startProcess(context); // Do-while instead of "while", as we've already called context.nextKeyValue() once do { - // FIXME: If the chromosome is different, we should start a new process and get a new outputKeyPrefix + currentValue = context.getCurrentValue(); + // Restart the process if the input bytes exceed the limit + // or if the chromosome changes if (processedBytes > maxInputBytesPerProcess) { LOG.info("Processed bytes = " + processedBytes + " > " + maxInputBytesPerProcess + ". Restarting process."); - context.getCounter(COUNTER_GROUP_NAME, "RESTARTED_PROCESS").increment(1); - closeProcess(context); - startProcess(context); + restartProcess(context, "BYTES_LIMIT"); + } else if (!currentChromosome.equals(currentValue.getChromosome())) { + LOG.info("Chromosome changed from " + currentChromosome + " to " + currentValue.getChromosome() + + ". Restarting process."); + restartProcess(context, "CHR_CHANGE"); } - currentValue = context.getCurrentValue(); map(context.getCurrentKey(), currentValue, context); } while (!hasExceptions() && context.nextKeyValue()); } catch (Throwable th) { @@ -187,6 +189,13 @@ public void run(Context context) throws IOException, InterruptedException { throwExceptionIfAny(); } + private void restartProcess(Mapper.Context context, String reason) + throws IOException, InterruptedException, StorageEngineException { + context.getCounter(COUNTER_GROUP_NAME, "RESTARTED_PROCESS_" + reason).increment(1); + closeProcess(context); + startProcess(context); + } + private boolean hasExceptions() { return !throwables.isEmpty(); } @@ -336,10 +345,20 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept // drainStdout(context); } - private void startProcess(Context context) throws IOException, StorageEngineException { + private void startProcess(Context context) throws IOException, StorageEngineException, InterruptedException { LOG.info("bash -ce '" + commandLine + "'"); context.getCounter(COUNTER_GROUP_NAME, "START_PROCESS").increment(1); + Variant variant = context.getCurrentValue(); + currentChromosome = variant.getChromosome(); + currentPosition = variant.getStart(); + if (firstVariant == null) { + firstVariant = variant.getChromosome() + ":" + variant.getStart(); + } + outputKeyPrefix = buildOutputKeyPrefix(variant.getChromosome(), variant.getStart()); + stdoutKeyNum = 0; + stderrKeyNum = 0; + // Start the process ProcessBuilder builder = new ProcessBuilder("bash", "-ce", commandLine); // System.getenv().forEach((k, v) -> LOG.info("SYSTEM ENV: " + k + "=" + v)); @@ -500,7 +519,8 @@ public void run() { stopWatch.start(); write("---------- " + context.getTaskAttemptID().toString() + " -----------"); write("Start time : " + TimeUtils.getTimeMillis()); - write("Batch start : " + firstKey + " -> " + outputKeyPrefix); + write("Input split : " + firstVariant); + write("Batch start : " + currentChromosome + ":" + currentPosition + " -> " + outputKeyPrefix); write("sub-process #" + processCount); write("--- START STDERR ---"); int numRecords = 0; From 7fd439a7733872b38c66b5f4d19e2c2fac64d0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 29 Oct 2024 22:05:26 +0000 Subject: [PATCH 030/122] storage: Fix GenomeHellper generateBootPreSplits. #TASK-6722 --- .../opencb/opencga/storage/hadoop/variant/GenomeHelper.java | 5 +++-- .../storage/hadoop/variant/mr/StreamVariantMapper.java | 1 + .../opencga/storage/hadoop/variant/GenomeHelperTest.java | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java index 0276898a61..9305908857 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelper.java @@ -159,9 +159,10 @@ static List generateBootPreSplits(int numberOfSplits, BiFunction " + maxInputBytesPerProcess + ". Restarting process."); restartProcess(context, "BYTES_LIMIT"); } else if (!currentChromosome.equals(currentValue.getChromosome())) { + // TODO: Should we change only when the chromosome change would produce a partition change? LOG.info("Chromosome changed from " + currentChromosome + " to " + currentValue.getChromosome() + ". Restarting process."); restartProcess(context, "CHR_CHANGE"); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelperTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelperTest.java index 4438c66890..ab359f1b91 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelperTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/GenomeHelperTest.java @@ -71,6 +71,11 @@ public void testGenerateSplitArchive() throws Exception { assertOrder(GenomeHelper.generateBootPreSplitsHuman(30, (chr, pos) -> keyFactory.generateBlockIdAsBytes(1, chr, pos)), 30); } + @Test + public void testGenerateSplitArchiveMultiple() throws Exception { + assertOrder(GenomeHelper.generateBootPreSplitsHuman(2, (chr, pos) -> keyFactory.generateBlockIdAsBytes(1, chr, pos)), 2); + } + @Test public void testGenerateSplitVariant() throws Exception { int expectedSize = 10; From e6128b0f5223c3ce2c3517e32e3ccb9722b1ea0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 30 Oct 2024 15:12:34 +0000 Subject: [PATCH 031/122] storage: Do not interrupt header with empty lines while concat. #TASK-6722 --- .../storage/hadoop/utils/AbstractHBaseDriver.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 3773141eef..e86a8dd3fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -471,9 +471,11 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool if (paths.isEmpty()) { LOGGER.warn("The MapReduce job didn't produce any output. This may not be expected."); } else if (paths.size() == 1) { - LOGGER.info("Copy to local file " + paths.get(0).toUri() + " to " + localOutput.toUri()); + LOGGER.info("Copy to local file"); + LOGGER.info(" Source : {} ({})", + paths.get(0).toUri(), humanReadableByteCount(fileSystem.getFileStatus(paths.get(0)).getLen(), false)); + LOGGER.info(" Target : {}", localOutput.toUri()); fileSystem.copyToLocalFile(false, paths.get(0), localOutput); - LOGGER.info("File size : " + humanReadableByteCount(Files.size(Paths.get(localOutput.toUri())), false)); } else { LOGGER.info("Concat and copy to local " + paths.size()); LOGGER.info(" Source : " + mrOutdir.toUri()); @@ -485,8 +487,8 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool OutputStream os = gzOs == null ? fsOs : gzOs; for (int i = 0; i < paths.size(); i++) { Path path = paths.get(i); - LOGGER.info("Concat {}file : '{}' {} ", - isGzip ? "gzip " : "", + LOGGER.info("Concat {} : '{}' ({}) ", + isGzip ? "gzip file" : "file", path.toUri(), humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); try (FSDataInputStream fsIs = fileSystem.open(path)) { @@ -503,7 +505,8 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool do { br.mark(10 * 1024 * 1024); //10MB line = br.readLine(); - } while (line != null && line.startsWith("#")); + // Skip blank lines and + } while (line != null && (StringUtils.isBlank(line) || line.startsWith("#"))); br.reset(); is = new ReaderInputStream(br, Charset.defaultCharset()); } From f45c22306e333867d6753d082b1ffc970ecdff59 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 19:59:49 +0100 Subject: [PATCH 032/122] Prepare release 3.0.1 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 314766dc1d..43b1160fe2 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index eaa503dd09..c4dfcb85e4 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index f6f5768166..fadfdd92a1 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 909598e048..0ca39ca098 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index c173e0eb19..8963d152c2 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 43f0f3f65b..80cc54dcb2 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index bc7eeae241..fce3eef301 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 1b1e16acbc..c35e769d46 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 8b4822ad6e..7f5ba7e7ca 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 0c0bf1c727..9de3546a3e 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 20cd38689a..8182135dd4 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index 3d0375fea0..31dd086e2b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index 6837051499..e95b3ba1c0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index b5852968a7..66034e11b8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 5e097253ee..27d4d67362 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 88200e3425..5ef22584c7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 2d94efc52f..cedc3c90e9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index 0b50aadf33..d75d126306 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 25941e5afe..5faf374ce1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index 91438cbe32..e2dc27a8fd 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 0026c0c4ba..87b718d629 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index d824df6b12..e2561199b0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 470344d436..1c008fe8fa 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 215850fd9b..0c614d54af 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index b2e665c34f..7a177fdf3e 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 0ef882d8ac..37d0589acb 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/pom.xml b/pom.xml index cc351a473d..a9c566886a 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1-SNAPSHOT + 3.0.1 pom OpenCGA From e2c126162d16203a05fcd1a48b8e56125ae71860 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 19:59:54 +0100 Subject: [PATCH 033/122] Prepare new development version 3.0.2-SNAPSHOT --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 43b1160fe2..b87b735227 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index c4dfcb85e4..9888d9eff1 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index fadfdd92a1..9b90d1abe8 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 0ca39ca098..e79ddeca00 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index 8963d152c2..d14ae03e2d 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 80cc54dcb2..7fdcef9359 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index fce3eef301..4b28c21116 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index c35e769d46..2b17811efd 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 7f5ba7e7ca..2ea3d81f94 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 9de3546a3e..d9eac96485 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 8182135dd4..4c272f5582 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index 31dd086e2b..d8175c811e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index e95b3ba1c0..b9497bf885 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index 66034e11b8..2e8fc54913 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 27d4d67362..4c3be0aacf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 5ef22584c7..3f28c2c77a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index cedc3c90e9..016818d607 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index d75d126306..fa81312adc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 5faf374ce1..94fd573d1c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index e2dc27a8fd..78becbf80e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 87b718d629..5f7d8651c9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index e2561199b0..01b610468f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 1c008fe8fa..9ef4161339 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 0c614d54af..8fc3460884 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 7a177fdf3e..9298492d10 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 37d0589acb..923d0bc200 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index a9c566886a..39db4b7cfc 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT pom OpenCGA From 345a6f2889f1f039e2c21f195482b0a9424807f0 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 20:24:12 +0100 Subject: [PATCH 034/122] Prepare release 3.0.2 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 39db4b7cfc..f68aa475a8 100644 --- a/pom.xml +++ b/pom.xml @@ -43,8 +43,8 @@ - - 3.0.1 + 3.0.2 + 3.0.2 6.0.0 3.0.0 5.0.0 From 3863e5ecf597b7f922348f4dd2c0d43d24e0f5f4 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 20:27:51 +0100 Subject: [PATCH 035/122] Prepare release 3.0.1 --- .github/workflows/release.yml | 26 +++++++++---------- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- .../opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../pom.xml | 2 +- .../pom.xml | 2 +- .../pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib/pom.xml | 2 +- .../opencga-storage-hadoop/pom.xml | 2 +- .../opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 6 ++--- 28 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 86bcb47197..33aa540ca2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,19 +12,19 @@ jobs: with: maven_opts: -P hdp3.1,RClient -Dopencga.war.name=opencga -Dcheckstyle.skip - deploy-maven: - uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop - needs: build - with: - maven_opts: -P hdp3.1 -Dopencga.war.name=opencga - secrets: inherit - - deploy-docker: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop - needs: build - with: - cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag ${{ needs.build.outputs.version }} - secrets: inherit +# deploy-maven: +# uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop +# needs: build +# with: +# maven_opts: -P hdp3.1 -Dopencga.war.name=opencga +# secrets: inherit +# +# deploy-docker: +# uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop +# needs: build +# with: +# cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag ${{ needs.build.outputs.version }} +# secrets: inherit deploy-python: uses: opencb/java-common-libs/.github/workflows/deploy-python-workflow.yml@develop diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index b87b735227..43b1160fe2 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index 9888d9eff1..c4dfcb85e4 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 9b90d1abe8..fadfdd92a1 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index e79ddeca00..0ca39ca098 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index d14ae03e2d..8963d152c2 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 7fdcef9359..80cc54dcb2 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 4b28c21116..fce3eef301 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 2b17811efd..c35e769d46 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 2ea3d81f94..7f5ba7e7ca 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index d9eac96485..9de3546a3e 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 4c272f5582..8182135dd4 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index d8175c811e..31dd086e2b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index b9497bf885..e95b3ba1c0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index 2e8fc54913..66034e11b8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 4c3be0aacf..27d4d67362 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 3f28c2c77a..5ef22584c7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 016818d607..cedc3c90e9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index fa81312adc..d75d126306 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 94fd573d1c..5faf374ce1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index 78becbf80e..e2dc27a8fd 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 5f7d8651c9..87b718d629 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index 01b610468f..e2561199b0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 9ef4161339..1c008fe8fa 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 8fc3460884..0c614d54af 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 9298492d10..7a177fdf3e 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 923d0bc200..37d0589acb 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 ../pom.xml diff --git a/pom.xml b/pom.xml index f68aa475a8..a19afedca3 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.1 pom OpenCGA @@ -43,8 +43,8 @@ - 3.0.2 - 3.0.2 + 3.0.1 + 3.0.1 6.0.0 3.0.0 5.0.0 From 1269834c23a3ec3aa55d107f9d7941c983b1973c Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 20:31:57 +0100 Subject: [PATCH 036/122] Prepare release 3.0.1 --- .github/workflows/release.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 33aa540ca2..f283f1a003 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,7 +36,8 @@ jobs: release: uses: opencb/java-common-libs/.github/workflows/release-github-workflow.yml@develop - needs: [ build, deploy-maven, deploy-docker, deploy-python ] + # needs: [ build, deploy-maven, deploy-docker, deploy-python ] + needs: build with: artifact: build-folder file: | From 40484f00b63075f6415207c40ea95b75a04e1a70 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 20:32:32 +0100 Subject: [PATCH 037/122] Prepare release 3.0.1 --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f283f1a003..c02f0d3833 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -37,7 +37,7 @@ jobs: release: uses: opencb/java-common-libs/.github/workflows/release-github-workflow.yml@develop # needs: [ build, deploy-maven, deploy-docker, deploy-python ] - needs: build + needs: deploy-python with: artifact: build-folder file: | From 20423c3406b831ff821e0690d682e9385e98828c Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 30 Oct 2024 20:40:49 +0100 Subject: [PATCH 038/122] Prepare release 3.0.2 --- .github/workflows/release.yml | 29 +++++++++---------- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- .../opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../pom.xml | 2 +- .../pom.xml | 2 +- .../pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib/pom.xml | 2 +- .../opencga-storage-hadoop/pom.xml | 2 +- .../opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 6 ++-- 28 files changed, 43 insertions(+), 44 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c02f0d3833..86bcb47197 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,19 +12,19 @@ jobs: with: maven_opts: -P hdp3.1,RClient -Dopencga.war.name=opencga -Dcheckstyle.skip -# deploy-maven: -# uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop -# needs: build -# with: -# maven_opts: -P hdp3.1 -Dopencga.war.name=opencga -# secrets: inherit -# -# deploy-docker: -# uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop -# needs: build -# with: -# cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag ${{ needs.build.outputs.version }} -# secrets: inherit + deploy-maven: + uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop + needs: build + with: + maven_opts: -P hdp3.1 -Dopencga.war.name=opencga + secrets: inherit + + deploy-docker: + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop + needs: build + with: + cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag ${{ needs.build.outputs.version }} + secrets: inherit deploy-python: uses: opencb/java-common-libs/.github/workflows/deploy-python-workflow.yml@develop @@ -36,8 +36,7 @@ jobs: release: uses: opencb/java-common-libs/.github/workflows/release-github-workflow.yml@develop - # needs: [ build, deploy-maven, deploy-docker, deploy-python ] - needs: deploy-python + needs: [ build, deploy-maven, deploy-docker, deploy-python ] with: artifact: build-folder file: | diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 43b1160fe2..b87b735227 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index c4dfcb85e4..9888d9eff1 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index fadfdd92a1..9b90d1abe8 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 0ca39ca098..e79ddeca00 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index 8963d152c2..d14ae03e2d 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 80cc54dcb2..7fdcef9359 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index fce3eef301..4b28c21116 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index c35e769d46..2b17811efd 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 7f5ba7e7ca..2ea3d81f94 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 9de3546a3e..d9eac96485 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 8182135dd4..4c272f5582 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index 31dd086e2b..d8175c811e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index e95b3ba1c0..b9497bf885 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index 66034e11b8..2e8fc54913 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 27d4d67362..4c3be0aacf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 5ef22584c7..3f28c2c77a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index cedc3c90e9..016818d607 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index d75d126306..fa81312adc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 5faf374ce1..94fd573d1c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index e2dc27a8fd..78becbf80e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 87b718d629..5f7d8651c9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index e2561199b0..01b610468f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 1c008fe8fa..9ef4161339 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 0c614d54af..8fc3460884 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 7a177fdf3e..9298492d10 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 37d0589acb..923d0bc200 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index a19afedca3..f68aa475a8 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.1 + 3.0.2-SNAPSHOT pom OpenCGA @@ -43,8 +43,8 @@ - 3.0.1 - 3.0.1 + 3.0.2 + 3.0.2 6.0.0 3.0.0 5.0.0 From 100fecfcc93076cdf51767f2dd8f16d8f352c760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 31 Oct 2024 13:49:44 +0000 Subject: [PATCH 039/122] storage: Replace ImmutableBytesWritable with VariantLocusKey as map output key. #TASK-6722 --- .../hadoop/variant/io/VariantDriver.java | 32 +++- .../variant/mr/StreamVariantDriver.java | 30 ++-- .../variant/mr/StreamVariantMapper.java | 37 ++--- .../variant/mr/StreamVariantPartitioner.java | 64 -------- .../variant/mr/StreamVariantReducer.java | 14 +- .../hadoop/variant/mr/VariantLocusKey.java | 146 +++++++++++++++++ .../mr/VariantLocusKeyPartitioner.java | 50 ++++++ .../variant/mr/StreamVariantMapperTest.java | 41 ----- .../mr/StreamVariantPartitionerTest.java | 57 ------- .../mr/VariantLocusKeyPartitionerTest.java | 56 +++++++ .../variant/mr/VariantLocusKeyTest.java | 152 ++++++++++++++++++ 11 files changed, 459 insertions(+), 220 deletions(-) delete mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitioner.java delete mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java delete mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitionerTest.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index d11b1824aa..223c0b9155 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -3,8 +3,10 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.opencb.commons.datastore.core.Query; @@ -93,6 +95,10 @@ protected void parseAndValidateParameters() throws IOException { protected abstract Class getReducerClass(); + protected Class getPartitioner() { + return null; + } + protected abstract Class getOutputFormatClass(); protected abstract void setupJob(Job job) throws IOException; @@ -160,12 +166,30 @@ protected final Job setupJob(Job job, String archiveTable, String variantTable) } protected void setupReducer(Job job, String variantTable) throws IOException { - logger.info("Use one Reduce task to produce a single file"); - job.setReducerClass(getReducerClass()); - job.setNumReduceTasks(1); + Class partitionerClass = getPartitioner(); + if (partitionerClass == null) { + logger.info("Use one Reduce task to produce a single file"); + job.setReducerClass(getReducerClass()); + job.setNumReduceTasks(1); + } else { + String numReducersKey = getClass().getSimpleName() + "." + JobContext.NUM_REDUCES; + String numReducersStr = getParam(numReducersKey); + int reduceTasks; + if (StringUtils.isNotEmpty(numReducersStr)) { + reduceTasks = Integer.parseInt(numReducersStr); + logger.info("Set reduce tasks to " + reduceTasks + " (derived from input parameter '" + numReducersKey + "')"); + } else { + int serversSize = getHBaseManager().act(variantTable, (table, admin) -> admin.getClusterStatus().getServersSize()); + // Set the number of reduce tasks to 2x times the number of servers + reduceTasks = serversSize * 2; + logger.info("Set reduce tasks to " + reduceTasks + " (derived from 'number_of_servers * 2')"); + } + job.setReducerClass(getReducerClass()); + job.setPartitionerClass(partitionerClass); + job.setNumReduceTasks(reduceTasks); + } } - @Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index ccf2007408..b6cedb5d48 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -2,7 +2,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DeflateCodec; @@ -10,6 +9,7 @@ import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; @@ -113,6 +113,11 @@ protected Class getReducerClass() { return reducerClass; } + @Override + protected Class getPartitioner() { + return VariantLocusKeyPartitioner.class; + } + @Override protected Class getOutputFormatClass() { return outputFormatClass; @@ -124,9 +129,10 @@ protected void setupJob(Job job) throws IOException { job.getConfiguration().setBoolean(JobContext.MAP_OUTPUT_COMPRESS, true); job.getConfiguration().setClass(JobContext.MAP_OUTPUT_COMPRESS_CODEC, DeflateCodec.class, CompressionCodec.class); - Class keyClass = ImmutableBytesWritable.class; -// Class keyClass = NullWritable.class; -// Class keyClass = Text.class; + Class keyClass = VariantLocusKey.class; +// Class keyClass = ImmutableBytesWritable.class; +// Class keyClass = NullWritable.class; +// Class keyClass = Text.class; Class valueClass = Text.class; mapperClass = StreamVariantMapper.class; @@ -155,21 +161,7 @@ protected void setupJob(Job job) throws IOException { @Override protected void setupReducer(Job job, String variantTableName) throws IOException { - String numReducersKey = getClass().getSimpleName() + "." + JobContext.NUM_REDUCES; - String numReducersStr = getParam(numReducersKey); - int reduceTasks; - if (StringUtils.isNotEmpty(numReducersStr)) { - reduceTasks = Integer.parseInt(numReducersStr); - logger.info("Set reduce tasks to " + reduceTasks + " (derived from input parameter '" + numReducersKey + "')"); - } else { - int serversSize = getHBaseManager().act(variantTableName, (table, admin) -> admin.getClusterStatus().getServersSize()); - // Set the number of reduce tasks to 2x the number of hosts - reduceTasks = serversSize * 2; - logger.info("Set reduce tasks to " + reduceTasks + " (derived from 'number_of_servers * 2')"); - } - job.setReducerClass(getReducerClass()); - job.setPartitionerClass(StreamVariantPartitioner.class); - job.setNumReduceTasks(reduceTasks); + super.setupReducer(job, variantTableName); // TODO: Use a grouping comparator to group by chromosome and position, ignoring the rest of the key? // job.setGroupingComparatorClass(StreamVariantGroupingComparator.class); // job.setSortComparatorClass(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index f6fc4d74c5..03c3aa6b0b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -31,7 +31,7 @@ import static org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper.COUNTER_GROUP_NAME; -public class StreamVariantMapper extends VariantMapper { +public class StreamVariantMapper extends VariantMapper { private static final Log LOG = LogFactory.getLog(StreamVariantMapper.class); private static final int BUFFER_SIZE = 128 * 1024; @@ -72,12 +72,11 @@ public class StreamVariantMapper extends VariantMapper 01 - // 3 -> 03 - // 22 -> 22 - // If the first character is a digit, and the second is not, add a 0 at the beginning - // MT -> MT - // 1_KI270712v1_random -> 01_KI270712v1_random - if (Character.isDigit(chromosome.charAt(0)) && (chromosome.length() == 1 || !Character.isDigit(chromosome.charAt(1)))) { - chromosome = "0" + chromosome; - } - - return String.format("%s|%010d|", chromosome, start); - } - @Override public void run(Context context) throws IOException, InterruptedException { if (context.nextKeyValue()) { @@ -190,7 +174,7 @@ public void run(Context context) throws IOException, InterruptedException { throwExceptionIfAny(); } - private void restartProcess(Mapper.Context context, String reason) + private void restartProcess(Mapper.Context context, String reason) throws IOException, InterruptedException, StorageEngineException { context.getCounter(COUNTER_GROUP_NAME, "RESTARTED_PROCESS_" + reason).increment(1); closeProcess(context); @@ -263,7 +247,7 @@ private void throwExceptionIfAny() throws IOException { } @Override - protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { + protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { closeProcess(context); dockerPruneImages(); super.cleanup(context); @@ -356,7 +340,6 @@ private void startProcess(Context context) throws IOException, StorageEngineExce if (firstVariant == null) { firstVariant = variant.getChromosome() + ":" + variant.getStart(); } - outputKeyPrefix = buildOutputKeyPrefix(variant.getChromosome(), variant.getStart()); stdoutKeyNum = 0; stderrKeyNum = 0; @@ -448,7 +431,7 @@ public static void addEnvironment(Map env, Configuration conf) { private class MROutputThread extends Thread { - private final Mapper.Context context; + private final Mapper.Context context; private long lastStdoutReport = 0; private int numRecords = 0; @@ -486,15 +469,14 @@ public void run() { private void write(Text line) throws IOException, InterruptedException { numRecords++; - context.write(new ImmutableBytesWritable( - Bytes.toBytes(String.format("%s%s%08d", StreamVariantReducer.STDOUT_KEY, outputKeyPrefix, stdoutKeyNum++))), line); + context.write(new VariantLocusKey(currentChromosome, currentPosition, StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)), line); } } private class MRErrorThread extends Thread { private final Configuration conf; - private final Mapper.Context context; + private final Mapper.Context context; private long lastStderrReport = 0; private final String reporterPrefix; private final String counterPrefix; @@ -521,7 +503,7 @@ public void run() { write("---------- " + context.getTaskAttemptID().toString() + " -----------"); write("Start time : " + TimeUtils.getTimeMillis()); write("Input split : " + firstVariant); - write("Batch start : " + currentChromosome + ":" + currentPosition + " -> " + outputKeyPrefix); + write("Batch start : " + currentChromosome + ":" + currentPosition); write("sub-process #" + processCount); write("--- START STDERR ---"); int numRecords = 0; @@ -568,8 +550,7 @@ private void write(String line) throws IOException, InterruptedException { } private void write(Text line) throws IOException, InterruptedException { - context.write(new ImmutableBytesWritable( - Bytes.toBytes(String.format("%s%s%08d", StreamVariantReducer.STDERR_KEY, outputKeyPrefix, stderrKeyNum++))), line); + context.write(new VariantLocusKey(currentChromosome, currentPosition, StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)), line); } private boolean matchesReporter(String line) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java deleted file mode 100644 index d2e1f0056c..0000000000 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitioner.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.opencb.opencga.storage.hadoop.variant.mr; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Partitioner; -import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; - -import java.io.IOException; -import java.util.List; -import java.util.TreeMap; - -public class StreamVariantPartitioner extends Partitioner implements Configurable { - - private TreeMap regionSplitsMap = new TreeMap<>(); - private Configuration conf; - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - try { - Job job = Job.getInstance(conf); - int numReduceTasks = job.getNumReduceTasks(); - setup(numReduceTasks); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public TreeMap setup(int numPartitions) { - List splits = GenomeHelper.generateBootPreSplitsHuman( - numPartitions, StreamVariantMapper::buildOutputKeyPrefix, String::compareTo, false); - regionSplitsMap.put(StreamVariantMapper.buildOutputKeyPrefix("0", 0), 0); - for (int i = 0; i < splits.size(); i++) { - regionSplitsMap.put(splits.get(i), regionSplitsMap.size()); - } - return regionSplitsMap; - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public int getPartition(ImmutableBytesWritable key, Text text, int numPartitions) { - int start = key.getOffset() + StreamVariantReducer.STDOUT_KEY_BYTES.length; - byte[] bytes = key.get(); - // Find last '|' - int idx = 0; - for (int i = key.getLength() + key.getOffset() - 1; i >= 0; i--) { - if (bytes[i] == '|') { - idx = i; - break; - } - } - String chrPos = Bytes.toString(bytes, start, idx - start); - return regionSplitsMap.floorEntry(chrPos).getValue(); - } - -} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index a6684c2d07..25598e593f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -11,7 +11,7 @@ import java.io.IOException; -public class StreamVariantReducer extends Reducer { +public class StreamVariantReducer extends Reducer { public static final String STDOUT_KEY = "O:"; public static final byte[] STDOUT_KEY_BYTES = Bytes.toBytes(STDOUT_KEY); @@ -20,22 +20,22 @@ public class StreamVariantReducer extends Reducer mos; + private MultipleOutputs mos; private boolean headerWritten = false; @Override - protected void setup(Reducer.Context context) + protected void setup(Reducer.Context context) throws IOException, InterruptedException { super.setup(context); mos = new MultipleOutputs<>(context); } @Override - protected void reduce(ImmutableBytesWritable key, Iterable values, - Reducer.Context context) + protected void reduce(VariantLocusKey key, Iterable values, + Reducer.Context context) throws IOException, InterruptedException { for (Text value : values) { - if (hasPrefix(key, STDOUT_KEY_BYTES)) { + if (key.getOther().startsWith(STDOUT_KEY)) { if (hasPrefix(value, HEADER_PREFIX_BYTES)) { if (headerWritten) { // skip header @@ -81,7 +81,7 @@ private static boolean hasPrefix(byte[] key, int offset, int length, byte[] pref } @Override - protected void cleanup(Reducer.Context context) + protected void cleanup(Reducer.Context context) throws IOException, InterruptedException { super.cleanup(context); mos.close(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java new file mode 100644 index 0000000000..a198e03de5 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java @@ -0,0 +1,146 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.hadoop.io.WritableComparable; +import org.opencb.biodata.models.variant.Variant; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Objects; + +/** + * Genomic locus key. + */ +public class VariantLocusKey implements WritableComparable { + private String chromosome; + private int position; + private String other; + + public VariantLocusKey() { + } + + public VariantLocusKey(String chromosome, int position) { + this.chromosome = chromosome; + this.position = position; + this.other = null; + } + + public VariantLocusKey(Variant variant) { + this(variant.getChromosome(), variant.getStart(), variant.getReference() + "_" + variant.getAlternate()); + } + + public VariantLocusKey(String chromosome, int position, String other) { + this.chromosome = chromosome; + this.position = position; + this.other = other; + } + + @Override + public int compareTo(VariantLocusKey o) { + String chr1; + String chr2; + if (isSingleDigitChromosome(chromosome)) { + chr1 = "0" + chromosome; + } else { + chr1 = chromosome; + } + if (isSingleDigitChromosome(o.chromosome)) { + chr2 = "0" + o.chromosome; + } else { + chr2 = o.chromosome; + } + int i = chr1.compareTo(chr2); + if (i == 0) { + i = position - o.position; + } + if (i == 0) { + if (other == null) { + i = o.other == null ? 0 : -1; + } else if (o.other == null) { + i = 1; + } else { + i = other.compareTo(o.other); + } + } + return i; + } + + public static boolean isSingleDigitChromosome(String chromosome) { + return Character.isDigit(chromosome.charAt(0)) && (chromosome.length() == 1 || !Character.isDigit(chromosome.charAt(1))); + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeChars(chromosome); + out.writeChars("\n"); + out.writeInt(position); + if (other != null) { + out.writeChars(other); + } else { + out.writeChars(""); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + chromosome = in.readLine(); + position = in.readInt(); + other = in.readLine(); + } + + public String getChromosome() { + return chromosome; + } + + public VariantLocusKey setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public VariantLocusKey setPosition(int position) { + this.position = position; + return this; + } + + public String getOther() { + return other; + } + + public VariantLocusKey setOther(String other) { + this.other = other; + return this; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + VariantLocusKey that = (VariantLocusKey) o; + return position == that.position + && Objects.equals(chromosome, that.chromosome) + && Objects.equals(other, that.other); + } + + @Override + public int hashCode() { + return Objects.hash(chromosome, position, other); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("VariantLocusKey{"); + sb.append("chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", other='").append(other).append('\''); + sb.append('}'); + return sb.toString(); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitioner.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitioner.java new file mode 100644 index 0000000000..7bb2a4dfa2 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitioner.java @@ -0,0 +1,50 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Partitioner; +import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; + +import java.io.IOException; +import java.util.List; +import java.util.TreeMap; + +public class VariantLocusKeyPartitioner extends Partitioner implements Configurable { + + private final TreeMap regionSplitsMap = new TreeMap<>(); + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + try { + Job job = Job.getInstance(conf); + int numReduceTasks = job.getNumReduceTasks(); + setup(numReduceTasks); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public TreeMap setup(int numPartitions) { + List splits = GenomeHelper.generateBootPreSplitsHuman( + numPartitions, VariantLocusKey::new, VariantLocusKey::compareTo, false); + regionSplitsMap.put(new VariantLocusKey("0", 0), 0); + for (int i = 0; i < splits.size(); i++) { + regionSplitsMap.put(splits.get(i), regionSplitsMap.size()); + } + return regionSplitsMap; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public int getPartition(VariantLocusKey variantLocusKey, V v, int numPartitions) { + return regionSplitsMap.floorEntry(variantLocusKey).getValue(); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java deleted file mode 100644 index 690a16df5f..0000000000 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapperTest.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.opencb.opencga.storage.hadoop.variant.mr; - -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import static org.junit.Assert.*; - - -@Category(ShortTests.class) -public class StreamVariantMapperTest { - @Test - public void buildOutputKeyPrefixSingleDigitChromosome() { - String result = StreamVariantMapper.buildOutputKeyPrefix("1", 100); - assertEquals("01|0000000100|", result); - } - - @Test - public void buildOutputKeyPrefixDoubleDigitChromosome() { - String result = StreamVariantMapper.buildOutputKeyPrefix("22", 100); - assertEquals("22|0000000100|", result); - } - - @Test - public void buildOutputKeyPrefixRandomChromosome() { - String result = StreamVariantMapper.buildOutputKeyPrefix("1_KI270712v1_random", 100); - assertEquals("01_KI270712v1_random|0000000100|", result); - } - - @Test - public void buildOutputKeyPrefixMTChromosome() { - String result = StreamVariantMapper.buildOutputKeyPrefix("MT", 100); - assertEquals("MT|0000000100|", result); - } - - @Test - public void buildOutputKeyPrefixXChromosome() { - String result = StreamVariantMapper.buildOutputKeyPrefix("X", 100); - assertEquals("X|0000000100|", result); - } -} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java deleted file mode 100644 index bbce3cd5cf..0000000000 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantPartitionerTest.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.opencb.opencga.storage.hadoop.variant.mr; - -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import static org.junit.Assert.assertEquals; - -@Category(ShortTests.class) -public class StreamVariantPartitionerTest { - - public static final int NUM_PARTITIONS = 10; - private StreamVariantPartitioner partitioner; - - @Before - public void setUp() { - partitioner = new StreamVariantPartitioner(); - partitioner.setup(NUM_PARTITIONS); - } - - @Test - public void partitionerTest() { - assertEquals(0, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:00|0000000001|")), null, NUM_PARTITIONS)); - assertEquals(0, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:01|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(0, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:02|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(1, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:03|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(2, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:04|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(2, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:05|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(3, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:06|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(3, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:07|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(4, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:08|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(4, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:09|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(5, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:10|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(5, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:11|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(6, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:12|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(6, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:13|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(7, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:14|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(7, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:15|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(7, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:16|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:17|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:17_random_contig|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:18|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:19|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:20|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(8, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:21|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:22|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:X|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:Y|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:MT|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:Z|0000000000|")), null, NUM_PARTITIONS)); - assertEquals(9, partitioner.getPartition(new ImmutableBytesWritable(Bytes.toBytes("o:Z_random_contig|0000000000|")), null, NUM_PARTITIONS)); - } - -} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitionerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitionerTest.java new file mode 100644 index 0000000000..8c4a1966da --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyPartitionerTest.java @@ -0,0 +1,56 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import static org.junit.Assert.assertEquals; + +@Category(ShortTests.class) +public class VariantLocusKeyPartitionerTest { + + public static final int NUM_PARTITIONS = 10; + private VariantLocusKeyPartitioner partitioner; + + @Before + public void setUp() { + partitioner = new VariantLocusKeyPartitioner<>(); + partitioner.setup(NUM_PARTITIONS); + } + + @Test + public void partitionerTest() { + assertEquals(0, partitioner.getPartition(new VariantLocusKey("0",1), null, NUM_PARTITIONS)); + assertEquals(0, partitioner.getPartition(new VariantLocusKey("1",0), null, NUM_PARTITIONS)); + assertEquals(0, partitioner.getPartition(new VariantLocusKey("2",0), null, NUM_PARTITIONS)); + assertEquals(1, partitioner.getPartition(new VariantLocusKey("3",0), null, NUM_PARTITIONS)); + assertEquals(2, partitioner.getPartition(new VariantLocusKey("4",0), null, NUM_PARTITIONS)); + assertEquals(2, partitioner.getPartition(new VariantLocusKey("5",0), null, NUM_PARTITIONS)); + assertEquals(3, partitioner.getPartition(new VariantLocusKey("6",0), null, NUM_PARTITIONS)); + assertEquals(3, partitioner.getPartition(new VariantLocusKey("7",0), null, NUM_PARTITIONS)); + assertEquals(4, partitioner.getPartition(new VariantLocusKey("8",0), null, NUM_PARTITIONS)); + assertEquals(4, partitioner.getPartition(new VariantLocusKey("9",0), null, NUM_PARTITIONS)); + assertEquals(5, partitioner.getPartition(new VariantLocusKey("10",0), null, NUM_PARTITIONS)); + assertEquals(5, partitioner.getPartition(new VariantLocusKey("11",0), null, NUM_PARTITIONS)); + assertEquals(6, partitioner.getPartition(new VariantLocusKey("12",0), null, NUM_PARTITIONS)); + assertEquals(6, partitioner.getPartition(new VariantLocusKey("13",0), null, NUM_PARTITIONS)); + assertEquals(7, partitioner.getPartition(new VariantLocusKey("14",0), null, NUM_PARTITIONS)); + assertEquals(7, partitioner.getPartition(new VariantLocusKey("15",0), null, NUM_PARTITIONS)); + assertEquals(7, partitioner.getPartition(new VariantLocusKey("16",0), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("17",0), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("17_random_contig",0), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("18",0), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("18",70880000), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("19",0), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("20",0), null, NUM_PARTITIONS)); + assertEquals(8, partitioner.getPartition(new VariantLocusKey("21",0), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new VariantLocusKey("22",0), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new VariantLocusKey("X",0), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new VariantLocusKey("Y",0), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new VariantLocusKey("MT",0), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new VariantLocusKey("Z",0), null, NUM_PARTITIONS)); + assertEquals(9, partitioner.getPartition(new VariantLocusKey("Z_random_contig",0), null, NUM_PARTITIONS)); + } + +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java new file mode 100644 index 0000000000..5263d74997 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java @@ -0,0 +1,152 @@ +package org.opencb.opencga.storage.hadoop.variant.mr; + +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +@Category(ShortTests.class) +public class VariantLocusKeyTest { + + @Test + public void shouldReturnTrueForEqualVariantLocusKeys() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000); + VariantLocusKey key2 = new VariantLocusKey("1", 1000); + assertTrue(key1.equals(key2)); + } + + @Test + public void shouldReturnFalseForDifferentVariantLocusKeys() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000); + VariantLocusKey key2 = new VariantLocusKey("2", 1000); + assertFalse(key1.equals(key2)); + } + + @Test + public void shouldReturnFalseForNullVariantLocusKey() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000); + assertFalse(key1.equals(null)); + } + + @Test + public void shouldReturnFalseForDifferentObjectType() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000); + String otherObject = "someString"; + assertFalse(key1.equals(otherObject)); + } + + @Test + public void shouldReturnConsistentHashCodeForEqualVariantLocusKeys() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000); + VariantLocusKey key2 = new VariantLocusKey("1", 1000); + assertEquals(key1.hashCode(), key2.hashCode()); + } + + @Test + public void shouldReturnDifferentHashCodeForDifferentVariantLocusKeys() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000); + VariantLocusKey key2 = new VariantLocusKey("2", 1000); + assertNotEquals(key1.hashCode(), key2.hashCode()); + } + + @Test + public void shouldReturnZeroForEqualVariantLocusKeys() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, "A"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "A"); + assertEquals(0, key1.compareTo(key2)); + } + + @Test + public void shouldReturnNegativeForSmallerChromosome() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, "A"); + VariantLocusKey key2 = new VariantLocusKey("2", 1000, "A"); + assertTrue(key1.compareTo(key2) < 0); + } + + @Test + public void shouldReturnPositiveForLargerChromosome() { + VariantLocusKey key1 = new VariantLocusKey("2", 1000, "A"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "A"); + assertTrue(key1.compareTo(key2) > 0); + } + + @Test + public void shouldReturnNegativeForSmallerPosition() { + VariantLocusKey key1 = new VariantLocusKey("1", 999, "A"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "A"); + assertTrue(key1.compareTo(key2) < 0); + } + + @Test + public void shouldReturnPositiveForLargerPosition() { + VariantLocusKey key1 = new VariantLocusKey("1", 1001, "A"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "A"); + assertTrue(key1.compareTo(key2) > 0); + } + + @Test + public void shouldReturnNegativeForSmallerOther() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, "A"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "B"); + assertTrue(key1.compareTo(key2) < 0); + } + + @Test + public void shouldReturnPositiveForLargerOther() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, "B"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "A"); + assertTrue(key1.compareTo(key2) > 0); + } + + @Test + public void shouldReturnZeroWhenBothOtherAreNull() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, null); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, null); + assertEquals(0, key1.compareTo(key2)); + } + + @Test + public void shouldReturnNegativeWhenOtherIsNull() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, null); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, "A"); + assertTrue(key1.compareTo(key2) < 0); + } + + @Test + public void shouldReturnPositiveWhenOtherIsNotNull() { + VariantLocusKey key1 = new VariantLocusKey("1", 1000, "A"); + VariantLocusKey key2 = new VariantLocusKey("1", 1000, null); + assertTrue(key1.compareTo(key2) > 0); + } + + @Test + public void shouldCompareChromosomesCorrectly() { + List keys = Arrays.asList( + new VariantLocusKey("1", 1000, "A"), + new VariantLocusKey("1_random", 1000, "A"), + new VariantLocusKey("2", 1000, "A"), + new VariantLocusKey("9", 1000, "A"), + new VariantLocusKey("10", 1000, "A"), + new VariantLocusKey("10_random", 1000, "A"), + new VariantLocusKey("19", 1000, "A"), + new VariantLocusKey("20", 1000, "A"), + new VariantLocusKey("22", 1000, "A"), + new VariantLocusKey("X", 1000, "A"), + new VariantLocusKey("Y", 1000, "A") + ); + + VariantLocusKey prevKey = null; + for (VariantLocusKey key : keys) { + if (prevKey == null) { + prevKey = key; + } else { + assertTrue(prevKey + " < " + key, prevKey.compareTo(key) < 0); + prevKey = key; + } + } + } +} \ No newline at end of file From 0df69dcc8fd540110fb8fb281da30ecc6a227070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 31 Oct 2024 13:51:13 +0000 Subject: [PATCH 040/122] storage: Use VariantLocusKey and VariantLocusPartitioner in VariantExportDirver. #TASK-6722 --- .../core/variant/io/VariantWriterFactory.java | 23 +++++++++++--- .../hadoop/utils/AbstractHBaseDriver.java | 5 +-- .../variant/io/HadoopVariantExporter.java | 4 +-- .../variant/io/VariantExporterDriver.java | 31 +++++++++++++------ .../variant/mr/StreamVariantMapper.java | 6 ++-- 5 files changed, 49 insertions(+), 20 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java index 61c2e6552d..157a9a8465 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactory.java @@ -76,11 +76,11 @@ public enum VariantOutputFormat { VCF_GZ("vcf.gz", false), JSON("json"), JSON_GZ("json.gz"), - AVRO("avro"), - AVRO_GZ("avro.gz"), - AVRO_SNAPPY("avro.snappy"), - PARQUET("parquet"), - PARQUET_GZ("parquet.gz"), + AVRO("avro", true, true), + AVRO_GZ("avro.gz", true, true), + AVRO_SNAPPY("avro.snappy", true, true), + PARQUET("parquet", true, true), + PARQUET_GZ("parquet.gz", true, true), STATS("stats.tsv", false), STATS_GZ("stats.tsv.gz", false), CELLBASE("frequencies.json"), @@ -90,16 +90,25 @@ public enum VariantOutputFormat { ENSEMBL_VEP_GZ("vep.txt.gz", false); private final boolean multiStudy; + private final boolean binary; private final String extension; VariantOutputFormat(String extension) { this.extension = extension; this.multiStudy = true; + this.binary = false; } VariantOutputFormat(String extension, boolean multiStudy) { this.multiStudy = multiStudy; this.extension = extension; + this.binary = false; + } + + VariantOutputFormat(String extension, boolean multiStudy, boolean binary) { + this.multiStudy = multiStudy; + this.extension = extension; + this.binary = binary; } public String getExtension() { @@ -122,6 +131,10 @@ public boolean isSnappy() { return extension.endsWith(".snappy"); } + public boolean isBinary() { + return binary; + } + public VariantOutputFormat inPlain() { if (!isPlain()) { return VariantOutputFormat.valueOf(name().replace("_GZ", "").replace("_SNAPPY", "")); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index e86a8dd3fb..8ee2092ce0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -477,7 +477,7 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool LOGGER.info(" Target : {}", localOutput.toUri()); fileSystem.copyToLocalFile(false, paths.get(0), localOutput); } else { - LOGGER.info("Concat and copy to local " + paths.size()); + LOGGER.info("Concat and copy to local : " + paths.size() + " partial files"); LOGGER.info(" Source : " + mrOutdir.toUri()); LOGGER.info(" Target : " + localOutput.toUri()); LOGGER.info(" ---- "); @@ -487,7 +487,8 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool OutputStream os = gzOs == null ? fsOs : gzOs; for (int i = 0; i < paths.size(); i++) { Path path = paths.get(i); - LOGGER.info("Concat {} : '{}' ({}) ", + LOGGER.info("[{}] Concat {} : '{}' ({}) ", + i, isGzip ? "gzip file" : "file", path.toUri(), humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java index b5b0cb0e6e..2bc76ab5e9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java @@ -180,13 +180,13 @@ public List export(@Nullable URI outputFileUri, VariantWriterFactory.Varian logger.info("Query for approximately {} of {} variants, which is {}% of the total." + " Consider small query." + " Skip MapReduce", - count, totalCount, matchRate * 100); + count, totalCount, String.format("%.2f", matchRate * 100)); smallQuery = true; } else { logger.info("Query for approximately {} of {} variants, which is {}% of the total." + " Current variants threshold is {}, and matchRatioThreshold is {}% ." + " Not a small query", - count, totalCount, matchRate * 100, variantsThreshold, matchRatioThreshold); + count, totalCount, String.format("%.2f", matchRate * 100), variantsThreshold, matchRatioThreshold); } } catch (VariantSearchException e) { logger.info("Unable to count variants from SearchEngine", e); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index c44e686e4d..d2489fac1c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -12,6 +12,7 @@ import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; @@ -25,6 +26,8 @@ import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.mr.VariantFileOutputFormat; +import org.opencb.opencga.storage.hadoop.variant.mr.VariantLocusKey; +import org.opencb.opencga.storage.hadoop.variant.mr.VariantLocusKeyPartitioner; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapper; import java.io.IOException; @@ -41,6 +44,7 @@ public class VariantExporterDriver extends VariantDriver { private Class mapperClass; private Class reducerClass; private Class outputFormatClass; + private Class partitioner; @Override protected void parseAndValidateParameters() throws IOException { @@ -59,6 +63,11 @@ protected Class getReducerClass() { return reducerClass; } + @Override + protected Class getPartitioner() { + return partitioner; + } + @Override protected Class getOutputFormatClass() { return outputFormatClass; @@ -76,7 +85,7 @@ protected void setupJob(Job job) throws IOException { case AVRO: outputFormatClass = AvroKeyOutputFormat.class; if (useReduceStep) { - job.setMapOutputKeyClass(NullWritable.class); + job.setMapOutputKeyClass(VariantLocusKey.class); AvroJob.setMapOutputValueSchema(job, VariantAvro.getClassSchema()); AvroJob.setOutputKeySchema(job, VariantAvro.getClassSchema()); job.setOutputValueClass(NullWritable.class); @@ -108,11 +117,15 @@ protected void setupJob(Job job) throws IOException { } break; default: + if (outputFormat.isBinary()) { + throw new IllegalArgumentException("Unexpected binary output format " + outputFormat); + } if (useReduceStep) { - job.setMapOutputKeyClass(NullWritable.class); + job.setMapOutputKeyClass(VariantLocusKey.class); AvroJob.setMapOutputValueSchema(job, VariantAvro.getClassSchema()); mapperClass = AvroVariantExporterMapper.class; reducerClass = VariantExporterReducer.class; + partitioner = VariantLocusKeyPartitioner.class; } else { AvroJob.setOutputKeySchema(job, VariantAvro.getClassSchema()); mapperClass = VariantExporterDirectMapper.class; @@ -182,7 +195,7 @@ protected void map(Object key, Variant value, Context context) throws IOExceptio * @see VariantExporterReducer * @see AvroKeyVariantExporterReducer */ - public static class AvroVariantExporterMapper extends VariantMapper> { + public static class AvroVariantExporterMapper extends VariantMapper> { @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); @@ -193,7 +206,7 @@ protected void setup(Context context) throws IOException, InterruptedException { protected void map(Object key, Variant value, Context context) throws IOException, InterruptedException { context.getCounter(COUNTER_GROUP_NAME, "variants").increment(1); removeNullsFromAvro(value.getImpl(), context); - context.write(NullWritable.get(), new AvroValue<>(value.getImpl())); + context.write(new VariantLocusKey(value), new AvroValue<>(value.getImpl())); } } @@ -203,9 +216,9 @@ protected void map(Object key, Variant value, Context context) throws IOExceptio * @see AvroVariantExporterMapper * @see VariantWriterFactory.VariantOutputFormat */ - public static class VariantExporterReducer extends Reducer, Variant, NullWritable> { + public static class VariantExporterReducer extends Reducer, Variant, NullWritable> { @Override - protected void reduce(NullWritable key, Iterable> values, Context context) + protected void reduce(T key, Iterable> values, Context context) throws IOException, InterruptedException { for (AvroValue value : values) { context.write(new Variant(value.datum()), NullWritable.get()); @@ -219,10 +232,10 @@ protected void reduce(NullWritable key, Iterable> values, * @see AvroVariantExporterMapper * @see AvroKeyOutputFormat */ - public static class AvroKeyVariantExporterReducer - extends Reducer, AvroKey, NullWritable> { + public static class AvroKeyVariantExporterReducer + extends Reducer, AvroKey, NullWritable> { @Override - protected void reduce(NullWritable key, Iterable> values, Context context) + protected void reduce(T key, Iterable> values, Context context) throws IOException, InterruptedException { for (AvroValue value : values) { context.write(new AvroKey<>(value.datum()), NullWritable.get()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 03c3aa6b0b..048c3455e1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -469,7 +469,8 @@ public void run() { private void write(Text line) throws IOException, InterruptedException { numRecords++; - context.write(new VariantLocusKey(currentChromosome, currentPosition, StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)), line); + context.write(new VariantLocusKey(currentChromosome, currentPosition, + StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)), line); } } @@ -550,7 +551,8 @@ private void write(String line) throws IOException, InterruptedException { } private void write(Text line) throws IOException, InterruptedException { - context.write(new VariantLocusKey(currentChromosome, currentPosition, StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)), line); + context.write(new VariantLocusKey(currentChromosome, currentPosition, + StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)), line); } private boolean matchesReporter(String line) { From f6fd3d46642b763c84075d0f7799a0fd0ca26add Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 1 Nov 2024 09:47:07 +0000 Subject: [PATCH 041/122] storage: Fix VariantLocusKey serialization. #TASK-6722 --- .../hadoop/utils/AbstractHBaseDriver.java | 41 +++++++++++++++++-- .../variant/mr/StreamVariantMapper.java | 10 ++--- .../variant/mr/StreamVariantReducer.java | 8 +++- .../hadoop/variant/mr/VariantLocusKey.java | 11 +++-- .../variant/mr/VariantLocusKeyTest.java | 26 ++++++++++++ 5 files changed, 80 insertions(+), 16 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 8ee2092ce0..99a119264c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -6,6 +6,7 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.*; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; @@ -19,6 +20,10 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil; import org.opencb.commons.datastore.core.ObjectMap; @@ -34,11 +39,9 @@ import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.function.Supplier; +import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -193,6 +196,7 @@ public final int run(String[] args) throws Exception { LOGGER.info(" - Outdir : " + job.getConfiguration().get(FileOutputFormat.OUTDIR)); } LOGGER.info("================================================="); + reportRunningJobs(); boolean succeed = executeJob(job); if (!succeed) { LOGGER.error("error with job!"); @@ -215,6 +219,32 @@ public final int run(String[] args) throws Exception { return succeed ? 0 : 1; } + private void reportRunningJobs() { + // Get the number of pending or running jobs in yarn + try (YarnClient yarnClient = YarnClient.createYarnClient()) { + yarnClient.init(getConf()); + yarnClient.start(); + + List applications = yarnClient.getApplications(EnumSet.of( + YarnApplicationState.NEW, + YarnApplicationState.NEW_SAVING, + YarnApplicationState.SUBMITTED, + YarnApplicationState.ACCEPTED, + YarnApplicationState.RUNNING)); + if (applications.isEmpty()) { + LOGGER.info("No pending or running jobs in yarn"); + } else { + LOGGER.info("Found " + applications.size() + " pending or running jobs in yarn"); + for (Map.Entry> entry : applications.stream() + .collect(Collectors.groupingBy(ApplicationReport::getYarnApplicationState)).entrySet()) { + LOGGER.info(" * " + entry.getKey() + " : " + entry.getValue().size()); + } + } + } catch (IOException | YarnException e) { + LOGGER.error("Error getting list of pending jobs from YARN", e); + } + } + private boolean configFromArgs(String[] args) { int fixedSizeArgs = getFixedSizeArgs(); @@ -468,6 +498,8 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool } } } + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); if (paths.isEmpty()) { LOGGER.warn("The MapReduce job didn't produce any output. This may not be expected."); } else if (paths.size() == 1) { @@ -517,6 +549,7 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool } } LOGGER.info("File size : " + humanReadableByteCount(Files.size(Paths.get(localOutput.toUri())), false)); + LOGGER.info("Time to copy from HDFS and concat : " + TimeUtils.durationToString(stopWatch)); } return paths; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 048c3455e1..f0a1de0f73 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -131,12 +131,12 @@ public void run(Context context) throws IOException, InterruptedException { // or if the chromosome changes if (processedBytes > maxInputBytesPerProcess) { LOG.info("Processed bytes = " + processedBytes + " > " + maxInputBytesPerProcess + ". Restarting process."); - restartProcess(context, "BYTES_LIMIT"); + restartProcess(context, "bytes_limit"); } else if (!currentChromosome.equals(currentValue.getChromosome())) { // TODO: Should we change only when the chromosome change would produce a partition change? LOG.info("Chromosome changed from " + currentChromosome + " to " + currentValue.getChromosome() + ". Restarting process."); - restartProcess(context, "CHR_CHANGE"); + restartProcess(context, "chr_change"); } map(context.getCurrentKey(), currentValue, context); } while (!hasExceptions() && context.nextKeyValue()); @@ -169,14 +169,14 @@ public void run(Context context) throws IOException, InterruptedException { addException(th); } } else { - context.getCounter(COUNTER_GROUP_NAME, "EMPTY_INPUT_SPLIT").increment(1); + context.getCounter(COUNTER_GROUP_NAME, "empty_input_split").increment(1); } throwExceptionIfAny(); } private void restartProcess(Mapper.Context context, String reason) throws IOException, InterruptedException, StorageEngineException { - context.getCounter(COUNTER_GROUP_NAME, "RESTARTED_PROCESS_" + reason).increment(1); + context.getCounter(COUNTER_GROUP_NAME, "restarted_process_" + reason).increment(1); closeProcess(context); startProcess(context); } @@ -332,7 +332,7 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept private void startProcess(Context context) throws IOException, StorageEngineException, InterruptedException { LOG.info("bash -ce '" + commandLine + "'"); - context.getCounter(COUNTER_GROUP_NAME, "START_PROCESS").increment(1); + context.getCounter(COUNTER_GROUP_NAME, "start_process").increment(1); Variant variant = context.getCurrentValue(); currentChromosome = variant.getChromosome(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index 25598e593f..c10bbcb259 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -57,9 +57,15 @@ protected void reduce(VariantLocusKey key, Iterable values, context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "body_records").increment(1); } context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records").increment(1); - } else { + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records_bytes") + .increment(value.getLength()); + } else if (key.getOther().startsWith(STDERR_KEY)) { mos.write("stderr", key, value); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stderr_records").increment(1); + context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stderr_records_bytes") + .increment(value.getLength()); + } else { + throw new IllegalStateException("Unknown key " + key); } context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "records").increment(1); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java index a198e03de5..ce6d492612 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java @@ -71,21 +71,20 @@ public static boolean isSingleDigitChromosome(String chromosome) { @Override public void write(DataOutput out) throws IOException { - out.writeChars(chromosome); - out.writeChars("\n"); + out.writeUTF(chromosome); out.writeInt(position); if (other != null) { - out.writeChars(other); + out.writeUTF(other); } else { - out.writeChars(""); + out.writeUTF(""); } } @Override public void readFields(DataInput in) throws IOException { - chromosome = in.readLine(); + chromosome = in.readUTF(); position = in.readInt(); - other = in.readLine(); + other = in.readUTF(); } public String getChromosome() { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java index 5263d74997..74552d1f24 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java @@ -4,6 +4,7 @@ import org.junit.experimental.categories.Category; import org.opencb.opencga.core.testclassification.duration.ShortTests; +import java.io.*; import java.util.Arrays; import java.util.List; @@ -149,4 +150,29 @@ public void shouldCompareChromosomesCorrectly() { } } } + + @Test + public void testWriteAndRead() throws IOException { + testWriteAndRead(new VariantLocusKey("1_random", 1000, "A")); + testWriteAndRead(new VariantLocusKey("1", 3541316, "O:31231")); + testWriteAndRead(new VariantLocusKey("0", 3541316, "O:31231")); + testWriteAndRead(new VariantLocusKey("", 3541316, "")); + testWriteAndRead(new VariantLocusKey("", -2, "")); + } + + private static void testWriteAndRead(VariantLocusKey originalKey) throws IOException { + // Write the object to a byte array output stream + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream); + originalKey.write(dataOutputStream); + + // Read the object from a byte array input stream + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); + DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream); + VariantLocusKey readKey = new VariantLocusKey(); + readKey.readFields(dataInputStream); + + // Assert that the read object is equal to the original object + assertEquals(originalKey, readKey); + } } \ No newline at end of file From fa3c9f2a17ddd8a5492abf4fcdfd5283b5c5924e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 4 Nov 2024 15:27:26 +0000 Subject: [PATCH 042/122] storage: Fix "Request body si too large" #TASK-6722 --- .../variant/io/json/VariantJsonWriter.java | 6 +- .../variant/io/MaxWriteBlockOutputStream.java | 42 +++++++++ .../variant/mr/VariantFileOutputFormat.java | 14 ++- .../io/MaxWriteBlockOutputStreamTest.java | 85 +++++++++++++++++++ 4 files changed, 138 insertions(+), 9 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStream.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStreamTest.java diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java index 6930ef05b0..f8828b83c3 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java @@ -186,7 +186,11 @@ public boolean post() { fileGenerator.flush(); } } catch (IOException ex) { - close(); + try { + close(); + } catch (Exception ex1) { + ex.addSuppressed(ex1); + } throw new UncheckedIOException(ex); } return true; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStream.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStream.java new file mode 100644 index 0000000000..55b4f82acc --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStream.java @@ -0,0 +1,42 @@ +package org.opencb.opencga.storage.hadoop.variant.io; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * MaxWriteBlockOutputStream is a {@link FilterOutputStream} that writes blocks of a maximum size. + *

+ * If the block size is greater than the maximum block size, it will split the block in smaller blocks of the maximum size. + *

+ * This class is used to avoid writing large blocks into Azure Blob Storage. Azure Blob Storage has a limit of 4MB per block. + * See + * Request body too large. + */ +public class MaxWriteBlockOutputStream extends FilterOutputStream { + + private final int maxBlockSize; + + public MaxWriteBlockOutputStream(OutputStream out) { + this(out, 1024 * 1024 * 2); + } + + public MaxWriteBlockOutputStream(OutputStream out, int maxBlockSize) { + super(out); + this.maxBlockSize = maxBlockSize; + } + + @Override + public synchronized void write(byte[] b, int off, int len) throws IOException { + if (len > maxBlockSize) { + int start = 0; + while (start < len) { + int blockLength = Math.min(maxBlockSize, len - start); + out.write(b, off + start, blockLength); + start += blockLength; + } + } else { + out.write(b, off, len); + } + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java index 0903d498b8..5cc41ce2eb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java @@ -34,11 +34,10 @@ import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; +import org.opencb.opencga.storage.hadoop.variant.io.MaxWriteBlockOutputStream; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.OutputStream; +import java.io.*; /** @@ -67,13 +66,12 @@ public RecordWriter getRecordWriter(TaskAttemptContext jo } Path file = this.getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); - FSDataOutputStream fileOut = fs.create(file, false); + OutputStream out = fs.create(file, false); if (isCompressed) { - DataOutputStream out = new DataOutputStream(codec.createOutputStream(fileOut)); - return new VariantRecordWriter(configureWriter(job, out), out); - } else { - return new VariantRecordWriter(configureWriter(job, fileOut), fileOut); + out = new DataOutputStream(codec.createOutputStream(out)); } + out = new MaxWriteBlockOutputStream(out); + return new VariantRecordWriter(configureWriter(job, out), out); } private DataWriter configureWriter(final TaskAttemptContext job, OutputStream fileOut) throws IOException { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStreamTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStreamTest.java new file mode 100644 index 0000000000..21712f0d1f --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/MaxWriteBlockOutputStreamTest.java @@ -0,0 +1,85 @@ +package org.opencb.opencga.storage.hadoop.variant.io; + +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import java.io.*; +import java.util.Random; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertThrows; + +@Category(ShortTests.class) +public class MaxWriteBlockOutputStreamTest { + + @Test + public void shouldWriteAndReadDataCorrectly() throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + MaxWriteBlockOutputStream outputStream = new MaxWriteBlockOutputStream(byteArrayOutputStream); + + byte[] data = "test data".getBytes(); + outputStream.write(data, 0, data.length); + outputStream.flush(); + + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); + DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream); + byte[] readData = new byte[data.length]; + dataInputStream.readFully(readData); + + assertArrayEquals(data, readData); + } + + @Test + public void shouldHandleEmptyData() throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + MaxWriteBlockOutputStream outputStream = new MaxWriteBlockOutputStream(byteArrayOutputStream); + + byte[] data = new byte[0]; + outputStream.write(data, 0, data.length); + outputStream.flush(); + + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); + DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream); + byte[] readData = new byte[data.length]; + dataInputStream.readFully(readData); + + assertArrayEquals(data, readData); + } + + @Test + public void shouldHandleLargeData() throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + byteArrayOutputStream = Mockito.spy(byteArrayOutputStream); + Mockito.verify(byteArrayOutputStream, Mockito.never()).write(Mockito.any(byte[].class), Mockito.anyInt(), Mockito.anyInt()); + MaxWriteBlockOutputStream outputStream = new MaxWriteBlockOutputStream(byteArrayOutputStream, 1024); + + byte[] data = new byte[1024 * 1024]; // 1 MB of data + new Random().nextBytes(data); + outputStream.write(data, 0, data.length); + outputStream.flush(); + + // Check that the write method was called multiple times + Mockito.verify(byteArrayOutputStream, Mockito.times(1024)).write(Mockito.any(byte[].class), Mockito.anyInt(), Mockito.anyInt()); + Mockito.verify(byteArrayOutputStream, Mockito.never()).write(Mockito.any(byte[].class)); + + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); + DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream); + byte[] readData = new byte[data.length]; + dataInputStream.readFully(readData); + + assertArrayEquals(data, readData); + } + + @Test + public void shouldThrowExceptionForNullData() { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + MaxWriteBlockOutputStream outputStream = new MaxWriteBlockOutputStream(byteArrayOutputStream); + + assertThrows(NullPointerException.class, () -> { + outputStream.write(null, 0, 0); + }); + } + +} \ No newline at end of file From b528c033c962360c2a6f1787e695cba6e1ec8aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 4 Nov 2024 15:41:29 +0000 Subject: [PATCH 043/122] analysis: Do not try to close twice the same ERM. #TASK-6722 --- .../opencga/analysis/tools/OpenCgaTool.java | 36 +++++++++++-------- .../variant/mr/VariantFileOutputFormat.java | 5 +-- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java index 6024761d59..b5e01687b5 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java @@ -192,17 +192,11 @@ public final ExecutionResult start() throws ToolException { if (!erm.isClosed()) { String message = "Unexpected system shutdown. Job killed by the system."; privateLogger.error(message); + if (exception == null) { + exception = new RuntimeException(message); + } try { - if (scratchDir != null) { - deleteScratchDirectory(); - } - if (exception == null) { - exception = new RuntimeException(message); - } - logException(exception); - ExecutionResult result = erm.close(exception); - privateLogger.info("------- Tool '" + getId() + "' executed in " - + TimeUtils.durationToString(result.getEnd().getTime() - result.getStart().getTime()) + " -------"); + close(exception); } catch (ToolException e) { privateLogger.error("Error closing ExecutionResult", e); } @@ -271,13 +265,25 @@ public final ExecutionResult start() throws ToolException { } throw e; } finally { + // If the shutdown hook has been executed, the ExecutionResultManager is already closed + if (!erm.isClosed()) { + result = close(exception); + } else { + result = erm.read(); + } + } + return result; + } + + private ExecutionResult close(Throwable exception) throws ToolException { + if (scratchDir != null) { deleteScratchDirectory(); - stopMemoryMonitor(); - result = erm.close(exception); - logException(exception); - privateLogger.info("------- Tool '" + getId() + "' executed in " - + TimeUtils.durationToString(result.getEnd().getTime() - result.getStart().getTime()) + " -------"); } + logException(exception); + stopMemoryMonitor(); + ExecutionResult result = erm.close(exception); + privateLogger.info("------- Tool '" + getId() + "' executed in " + + TimeUtils.durationToString(result.getEnd().getTime() - result.getStart().getTime()) + " -------"); return result; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java index 5cc41ce2eb..3a4a2f7293 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java @@ -17,7 +17,6 @@ package org.opencb.opencga.storage.hadoop.variant.mr; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; @@ -37,7 +36,9 @@ import org.opencb.opencga.storage.hadoop.variant.io.MaxWriteBlockOutputStream; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; -import java.io.*; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; /** From 96e56795afdfaf7597d1648c1cd785da0a0a78d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 7 Nov 2024 14:30:41 +0000 Subject: [PATCH 044/122] storage: Do not use flush on outputstream. HADOOP-16548 #TASK-6722 --- .../variant/io/json/VariantJsonWriter.java | 8 ++--- .../variant/io/CountingOutputStream.java | 36 +++++++++++++++++++ .../variant/mr/VariantFileOutputFormat.java | 10 +++--- 3 files changed, 46 insertions(+), 8 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java index f8828b83c3..2a8437099b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/json/VariantJsonWriter.java @@ -177,13 +177,13 @@ public boolean write(Variant variant) { @Override public boolean post() { try { - variantsStream.flush(); - variantsGenerator.flush(); +// variantsStream.flush(); +// variantsGenerator.flush(); if (fileGenerator != null) { fileGenerator.writeObject(fileMetadata); - fileStream.flush(); - fileGenerator.flush(); +// fileStream.flush(); +// fileGenerator.flush(); } } catch (IOException ex) { try { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java new file mode 100644 index 0000000000..93f3dcd9bf --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java @@ -0,0 +1,36 @@ +package org.opencb.opencga.storage.hadoop.variant.io; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public class CountingOutputStream extends FilterOutputStream { + + private long count = 0; + + public CountingOutputStream(OutputStream os) { + super(os); + } + + @Override + public void write(int b) throws IOException { + out.write(b); + count++; + } + + @Override + public void write(byte[] b) throws IOException { + out.write(b); + count += b.length; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + count += len; + } + + public long getByteCount() { + return count; + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java index 3a4a2f7293..16dd0fffa3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java @@ -33,13 +33,15 @@ import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; -import org.opencb.opencga.storage.hadoop.variant.io.MaxWriteBlockOutputStream; +import org.opencb.opencga.storage.hadoop.variant.io.CountingOutputStream; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; +import static org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper.COUNTER_GROUP_NAME; + /** * Writes variants into any format supported by the {@link VariantWriterFactory}. @@ -71,7 +73,6 @@ public RecordWriter getRecordWriter(TaskAttemptContext jo if (isCompressed) { out = new DataOutputStream(codec.createOutputStream(out)); } - out = new MaxWriteBlockOutputStream(out); return new VariantRecordWriter(configureWriter(job, out), out); } @@ -100,11 +101,11 @@ private DataWriter configureWriter(final TaskAttemptContext job, Output protected static class VariantRecordWriter extends RecordWriter { private final DataWriter writer; - private final OutputStream outputStream; + private final CountingOutputStream outputStream; public VariantRecordWriter(DataWriter writer, OutputStream outputStream) { this.writer = writer; - this.outputStream = outputStream; + this.outputStream = new CountingOutputStream(outputStream); } @Override @@ -117,6 +118,7 @@ public void close(TaskAttemptContext taskAttemptContext) throws IOException, Int writer.post(); writer.close(); outputStream.close(); + taskAttemptContext.getCounter(COUNTER_GROUP_NAME, "bytes_written").increment(outputStream.getByteCount()); } } From bcd8185f1aab1121150524c3f7425cd023a020a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 7 Nov 2024 14:48:01 +0000 Subject: [PATCH 045/122] storage: Add VariantExporterDirectMultipleOutputsMapper to ensure sorted export without reduce step. #TASK-6722 --- .../hadoop/utils/AbstractHBaseDriver.java | 57 ++++++++++-- .../variant/executors/SshMRExecutor.java | 3 +- .../hadoop/variant/io/VariantDriver.java | 37 ++------ .../variant/io/VariantExporterDriver.java | 87 +++++++++++++++++-- .../variant/mr/StreamVariantDriver.java | 21 ++--- .../variant/mr/StreamVariantReducer.java | 6 +- .../utils/HBaseVariantTableNameGenerator.java | 4 + .../variant/HadoopVariantStorageTest.java | 1 + 8 files changed, 155 insertions(+), 61 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 99a119264c..7a069d6efa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -31,11 +31,13 @@ import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.hadoop.io.HDFSIOConnector; +import org.opencb.opencga.storage.hadoop.variant.executors.SshMRExecutor; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.net.URI; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Paths; @@ -220,6 +222,10 @@ public final int run(String[] args) throws Exception { } private void reportRunningJobs() { + if (getConf().getBoolean("storage.hadoop.mr.skipReportRunningJobs", false)) { + LOGGER.info("Skip report running jobs"); + return; + } // Get the number of pending or running jobs in yarn try (YarnClient yarnClient = YarnClient.createYarnClient()) { yarnClient.init(getConf()); @@ -362,10 +368,20 @@ protected Path getTempOutdir(String prefix, String suffix, boolean ensureHdfs) t } } } - LOGGER.info("Temporary directory: " + tmpDir.toUri()); + LOGGER.info("Temporary directory: " + toUri(tmpDir)); return new Path(tmpDir, fileName); } + private URI toUri(Path path) throws IOException { + URI tmpUri = path.toUri(); + if (tmpUri.getScheme() == null) { + // If the scheme is null, add the default scheme + FileSystem fileSystem = path.getFileSystem(getConf()); + tmpUri = fileSystem.getUri().resolve(tmpUri.getPath()); + } + return tmpUri; + } + protected Path getLocalOutput(Path outdir) throws IOException { return getLocalOutput(outdir, () -> null); } @@ -408,13 +424,23 @@ public class MapReduceOutputFile { private final Supplier nameGenerator; private final String tempFilePrefix; + private final Map extraFiles = new HashMap<>(); + private String namedOutput; protected Path localOutput; protected Path outdir; + public MapReduceOutputFile(String tempFilePrefix) throws IOException { + this.nameGenerator = () -> null; + this.tempFilePrefix = tempFilePrefix; + getOutputPath(); + namedOutput = null; + } + public MapReduceOutputFile(Supplier nameGenerator, String tempFilePrefix) throws IOException { this.nameGenerator = nameGenerator; this.tempFilePrefix = tempFilePrefix; getOutputPath(); + namedOutput = null; } protected void getOutputPath() throws IOException { @@ -428,10 +454,10 @@ protected void getOutputPath() throws IOException { outdir.getFileSystem(getConf()).deleteOnExit(outdir); } if (localOutput != null) { - LOGGER.info(" * Outdir file: " + localOutput.toUri()); - LOGGER.info(" * Temporary outdir file: " + outdir.toUri()); + LOGGER.info(" * Outdir file: " + toUri(localOutput)); + LOGGER.info(" * Temporary outdir file: " + toUri(outdir)); } else { - LOGGER.info(" * Outdir file: " + outdir.toUri()); + LOGGER.info(" * Outdir file: " + toUri(outdir)); } } } @@ -439,7 +465,7 @@ protected void getOutputPath() throws IOException { public void postExecute(boolean succeed) throws IOException { if (succeed) { if (localOutput != null) { - concatMrOutputToLocal(outdir, localOutput); + getConcatMrOutputToLocal(); } } if (localOutput != null) { @@ -447,6 +473,27 @@ public void postExecute(boolean succeed) throws IOException { } } + public MapReduceOutputFile setNamedOutput(String partFilePrefix) { + this.namedOutput = partFilePrefix; + return this; + } + + public void addExtraNamedOutput(String namedOutput, String localOutputPrefix) { + extraFiles.put(namedOutput, localOutputPrefix); + } + + protected void getConcatMrOutputToLocal() throws IOException { + concatMrOutputToLocal(outdir, localOutput, true, namedOutput); + + for (Map.Entry entry : extraFiles.entrySet()) { + String suffix = entry.getValue(); + String partFilePrefix = entry.getKey(); + Path extraOutput = localOutput.suffix(suffix); + concatMrOutputToLocal(outdir, extraOutput, true, partFilePrefix); + printKeyValue(SshMRExecutor.EXTRA_OUTPUT_PREFIX + partFilePrefix.toUpperCase(), extraOutput); + } + } + public Path getLocalOutput() { return localOutput; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index 612f3183a9..faea918588 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -35,6 +35,7 @@ public class SshMRExecutor extends MRExecutor { // env-var expected by "sshpass -e" private static final String SSHPASS_ENV = "SSHPASS"; public static final String PID = "PID"; + public static final String EXTRA_OUTPUT_PREFIX = "EXTRA_OUTPUT_"; private static Logger logger = LoggerFactory.getLogger(SshMRExecutor.class); @Override @@ -107,7 +108,7 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio if (exitValue == 0) { copyOutputFiles(args, env); for (String key : result.keySet()) { - if (key.startsWith("EXTRA_OUTPUT_")) { + if (key.startsWith(EXTRA_OUTPUT_PREFIX)) { copyOutputFiles(result.getString(key), env); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index 223c0b9155..7a2324e17f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -1,7 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.io; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; @@ -50,8 +49,7 @@ public abstract class VariantDriver extends AbstractVariantsTableDriver { public static final String OUTPUT_PARAM = "output"; public static final String CONCAT_OUTPUT_PARAM = "concat-output"; - protected Path outdir; - protected Path localOutput; + protected MapReduceOutputFile output; private final Query query = new Query(); private final QueryOptions options = new QueryOptions(); private static Logger logger = LoggerFactory.getLogger(VariantDriver.class); @@ -61,25 +59,9 @@ public abstract class VariantDriver extends AbstractVariantsTableDriver { protected void parseAndValidateParameters() throws IOException { setStudyId(-1); super.parseAndValidateParameters(); - String outdirStr = getParam(OUTPUT_PARAM); - if (StringUtils.isEmpty(outdirStr)) { - throw new IllegalArgumentException("Missing argument " + OUTPUT_PARAM); - } - useReduceStep = Boolean.valueOf(getParam(CONCAT_OUTPUT_PARAM)); - outdir = new Path(outdirStr); - if (isLocal(outdir)) { - localOutput = getLocalOutput(outdir); - outdir = getTempOutdir("opencga_export", localOutput.getName()); - outdir.getFileSystem(getConf()).deleteOnExit(outdir); - } - if (localOutput != null) { - useReduceStep = true; - logger.info(" * Outdir file: " + localOutput.toUri()); - logger.info(" * Temporary outdir file: " + outdir.toUri()); - } else { - logger.info(" * Outdir file: " + outdir.toUri()); - } +// useReduceStep = Boolean.valueOf(getParam(CONCAT_OUTPUT_PARAM)); + output = new MapReduceOutputFile(getTableNameGenerator().getDbName() + "_" + getClass().getSimpleName()); getQueryFromConfig(query, getConf()); getQueryOptionsFromConfig(options, getConf()); @@ -156,7 +138,7 @@ protected final Job setupJob(Job job, String archiveTable, String variantTable) setNoneTimestamp(job); - FileOutputFormat.setOutputPath(job, outdir); // set Path + FileOutputFormat.setOutputPath(job, output.getOutdir()); // set Path VariantMapReduceUtil.configureVariantConverter(job.getConfiguration(), false, true, true, query.getString(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "./.")); @@ -193,16 +175,7 @@ protected void setupReducer(Job job, String variantTable) throws IOException { @Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); - if (localOutput != null) { - if (succeed) { - copyMrOutputToLocal(); - } - deleteTemporaryFile(outdir); - } - } - - protected void copyMrOutputToLocal() throws IOException { - concatMrOutputToLocal(outdir, localOutput, true, null); + output.postExecute(succeed); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index d2489fac1c..f5c38ec7c7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -11,11 +11,11 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Partitioner; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.parquet.Log; import org.apache.parquet.avro.AvroParquetOutputFormat; import org.apache.parquet.hadoop.ParquetOutputFormat; @@ -43,7 +43,7 @@ public class VariantExporterDriver extends VariantDriver { private VariantWriterFactory.VariantOutputFormat outputFormat; private Class mapperClass; private Class reducerClass; - private Class outputFormatClass; + private Class outputFormatClass; private Class partitioner; @Override @@ -69,7 +69,7 @@ protected Class getPartitioner() { } @Override - protected Class getOutputFormatClass() { + protected Class getOutputFormatClass() { return outputFormatClass; } @@ -126,17 +126,25 @@ protected void setupJob(Job job) throws IOException { mapperClass = AvroVariantExporterMapper.class; reducerClass = VariantExporterReducer.class; partitioner = VariantLocusKeyPartitioner.class; + outputFormatClass = VariantFileOutputFormat.class; } else { AvroJob.setOutputKeySchema(job, VariantAvro.getClassSchema()); - mapperClass = VariantExporterDirectMapper.class; + mapperClass = VariantExporterDirectMultipleOutputsMapper.class; +// mapperClass = VariantExporterDirectMapper.class; + reducerClass = null; + +// MultipleOutputs.setCountersEnabled(job, true); + MultipleOutputs.addNamedOutput(job, VariantExporterDirectMultipleOutputsMapper.NAMED_OUTPUT, + VariantFileOutputFormat.class, Variant.class, NullWritable.class); + LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); + outputFormatClass = LazyOutputFormat.class; } if (outputFormat.isGzip()) { FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); // compression } else if (outputFormat.isSnappy()) { FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); // compression } - outputFormatClass = VariantFileOutputFormat.class; job.getConfiguration().set(VariantFileOutputFormat.VARIANT_OUTPUT_FORMAT, outputFormat.name()); job.setOutputKeyClass(Variant.class); break; @@ -169,6 +177,69 @@ protected void map(Object key, Variant value, Context context) throws IOExceptio } } + /** + * Mapper to convert to Variant. + * The output of this mapper should be connected directly to the {@link VariantWriterFactory.VariantOutputFormat} + * This mapper can not work with a reduce step. + * The output is written to multiple outputs, ensuring that generated files are sorted by chromosome and position. + */ + public static class VariantExporterDirectMultipleOutputsMapper extends VariantMapper { + + public static final String NAMED_OUTPUT = "export"; + private String baseOutputPath; + private String chromosome; + + public static String buildOutputKeyPrefix(String chromosome, Integer start) { + // If it's a single digit chromosome, add a 0 at the beginning + // 1 -> 01 + // 3 -> 03 + // 22 -> 22 + // If the first character is a digit, and the second is not, add a 0 at the beginning + // MT -> MT + // 1_KI270712v1_random -> 01_KI270712v1_random + if (VariantLocusKey.isSingleDigitChromosome(chromosome)) { + chromosome = "0" + chromosome; + } + + return String.format("%s.%s.%010d.", NAMED_OUTPUT, chromosome, start); + } + + private MultipleOutputs mos; + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + mos = new MultipleOutputs<>(context); + context.getCounter(COUNTER_GROUP_NAME, "variants").increment(0); + } + + @Override + protected void map(Object key, Variant value, Context context) throws IOException, InterruptedException { + context.getCounter(COUNTER_GROUP_NAME, "variants").increment(1); + if (baseOutputPath == null || !consecutiveChromosomes(chromosome, value.getChromosome())) { + baseOutputPath = buildOutputKeyPrefix(value.getChromosome(), value.getStart()); + chromosome = value.getChromosome(); + } + mos.write(NAMED_OUTPUT, value, NullWritable.get(), baseOutputPath); + } + + private static boolean consecutiveChromosomes(String prevChromosome, String newChromosome) { + if (newChromosome.equals(prevChromosome)) { + return true; + } + if (VariantLocusKey.isSingleDigitChromosome(prevChromosome)) { + return VariantLocusKey.isSingleDigitChromosome(newChromosome); + } else { + return !VariantLocusKey.isSingleDigitChromosome(newChromosome); + } + } + + @Override + protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { + super.cleanup(context); + mos.close(); + } + } + /** * Mapper to convert to VariantAvro. * The output of this mapper should be connected directly to the {@link AvroKeyOutputFormat} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index b6cedb5d48..c9a42c9be7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -1,7 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.mr; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DeflateCodec; @@ -34,6 +33,8 @@ public class StreamVariantDriver extends VariantDriver { public static final String MAX_BYTES_PER_MAP_PARAM = "maxBytesPerMap"; public static final String ENVIRONMENT_VARIABLES = "envVars"; public static final String STDERR_TXT_GZ = ".stderr.txt.gz"; + public static final String STDOUT_NAMED_OUTPUT = "stdout"; + public static final String STDERR_NAMED_OUTPUT = "stderr"; private VariantWriterFactory.VariantOutputFormat format; private int maxBytesPerMap; @@ -101,6 +102,9 @@ protected void parseAndValidateParameters() throws IOException { if (StringUtils.isEmpty(outdirStr)) { throw new IllegalArgumentException("Missing argument " + OUTPUT_PARAM); } + + output.setNamedOutput(STDOUT_NAMED_OUTPUT); + output.addExtraNamedOutput(STDERR_NAMED_OUTPUT, STDERR_TXT_GZ); } @Override @@ -144,10 +148,12 @@ protected void setupJob(Job job) throws IOException { StreamVariantMapper.setMaxInputBytesPerProcess(job, maxBytesPerMap); StreamVariantMapper.setEnvironment(job, envVars); + // Current implementation only supports using the reduce step + useReduceStep = true; reducerClass = StreamVariantReducer.class; - MultipleOutputs.addNamedOutput(job, "stdout", ValueOnlyTextOutputFormat.class, keyClass, valueClass); - MultipleOutputs.addNamedOutput(job, "stderr", ValueOnlyTextOutputFormat.class, keyClass, valueClass); + MultipleOutputs.addNamedOutput(job, STDOUT_NAMED_OUTPUT, ValueOnlyTextOutputFormat.class, keyClass, valueClass); + MultipleOutputs.addNamedOutput(job, STDERR_NAMED_OUTPUT, ValueOnlyTextOutputFormat.class, keyClass, valueClass); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); outputFormatClass = LazyOutputFormat.class; @@ -172,15 +178,6 @@ protected String getJobOperationName() { return "stream-variants"; } - - @Override - protected void copyMrOutputToLocal() throws IOException { - concatMrOutputToLocal(outdir, localOutput, true, "stdout"); - Path stderrOutput = localOutput.suffix(STDERR_TXT_GZ); - concatMrOutputToLocal(outdir, stderrOutput, true, "stderr"); - printKeyValue("EXTRA_OUTPUT_STDERR", stderrOutput); - } - @SuppressWarnings("unchecked") public static void main(String[] args) { main(args, (Class) MethodHandles.lookup().lookupClass()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java index c10bbcb259..695ed56e83 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantReducer.java @@ -41,7 +41,7 @@ protected void reduce(VariantLocusKey key, Iterable values, // skip header context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "header_records_skip").increment(1); } else { - mos.write("stdout", key, value); + mos.write(StreamVariantDriver.STDOUT_NAMED_OUTPUT, key, value); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "header_records").increment(1); } } else { @@ -53,14 +53,14 @@ protected void reduce(VariantLocusKey key, Iterable values, // No more header, assume all header is written headerWritten = true; } - mos.write("stdout", key, value); + mos.write(StreamVariantDriver.STDOUT_NAMED_OUTPUT, key, value); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "body_records").increment(1); } context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records").increment(1); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stdout_records_bytes") .increment(value.getLength()); } else if (key.getOther().startsWith(STDERR_KEY)) { - mos.write("stderr", key, value); + mos.write(StreamVariantDriver.STDERR_NAMED_OUTPUT, key, value); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stderr_records").increment(1); context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "stderr_records_bytes") .increment(value.getLength()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/utils/HBaseVariantTableNameGenerator.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/utils/HBaseVariantTableNameGenerator.java index 39d8e3868f..4777c7a876 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/utils/HBaseVariantTableNameGenerator.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/utils/HBaseVariantTableNameGenerator.java @@ -58,6 +58,10 @@ public HBaseVariantTableNameGenerator(String namespace, String dbName) { pendingSecondaryIndexPruneTableName = getPendingSecondaryIndexPruneTableName(namespace, this.dbName); } + public String getDbName() { + return dbName; + } + public String getVariantTableName() { return variantTableName; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java index cf4d7984ad..4ddc5d03a0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java @@ -420,6 +420,7 @@ static StorageConfiguration updateStorageConfiguration(StorageConfiguration stor TestMRExecutor.setStaticConfiguration(conf); options.put(HadoopVariantStorageOptions.MR_ADD_DEPENDENCY_JARS.key(), false); + options.put("storage.hadoop.mr.skipReportRunningJobs", true); EnumSet supportedAlgorithms = EnumSet.of(Compression.Algorithm.NONE, HBaseTestingUtility.getSupportedCompressionAlgorithms()); options.put(HadoopVariantStorageOptions.ARCHIVE_TABLE_COMPRESSION.key(), supportedAlgorithms.contains(Compression.Algorithm.GZ) From c4c3d3b4ac0d25e068ae7ee0d9e81a8c4b06c167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 7 Nov 2024 15:40:15 +0000 Subject: [PATCH 046/122] storage: Do not use reduce step on variant-walker. #TASK-6722 --- .../variant/io/HadoopVariantExporter.java | 5 +- .../variant/io/VariantExporterDriver.java | 6 +- .../variant/mr/StreamVariantDriver.java | 10 +++- .../variant/mr/StreamVariantMapper.java | 56 ++++++++++++++++--- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java index 2bc76ab5e9..b16ef09361 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java @@ -178,15 +178,16 @@ public List export(@Nullable URI outputFileUri, VariantWriterFactory.Varian logger.info("Count {}/{} variants from query {}", count, totalCount, getSearchEngineQuery(query)); if (count < variantsThreshold || matchRate < matchRatioThreshold) { logger.info("Query for approximately {} of {} variants, which is {}% of the total." + + " Current variants threshold is {}, and matchRatioThreshold is {}% ." + " Consider small query." + " Skip MapReduce", - count, totalCount, String.format("%.2f", matchRate * 100)); + count, totalCount, String.format("%.4f", matchRate * 100), variantsThreshold, matchRatioThreshold); smallQuery = true; } else { logger.info("Query for approximately {} of {} variants, which is {}% of the total." + " Current variants threshold is {}, and matchRatioThreshold is {}% ." + " Not a small query", - count, totalCount, String.format("%.2f", matchRate * 100), variantsThreshold, matchRatioThreshold); + count, totalCount, String.format("%.3f", matchRate * 100), variantsThreshold, matchRatioThreshold); } } catch (VariantSearchException e) { logger.info("Unable to count variants from SearchEngine", e); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index f5c38ec7c7..4c7abd3286 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -189,7 +189,7 @@ public static class VariantExporterDirectMultipleOutputsMapper extends VariantMa private String baseOutputPath; private String chromosome; - public static String buildOutputKeyPrefix(String chromosome, Integer start) { + public static String buildOutputKeyPrefix(String namedOutput, String chromosome, Integer start) { // If it's a single digit chromosome, add a 0 at the beginning // 1 -> 01 // 3 -> 03 @@ -201,7 +201,7 @@ public static String buildOutputKeyPrefix(String chromosome, Integer start) { chromosome = "0" + chromosome; } - return String.format("%s.%s.%010d.", NAMED_OUTPUT, chromosome, start); + return String.format("%s.%s.%010d.", namedOutput, chromosome, start); } private MultipleOutputs mos; @@ -216,7 +216,7 @@ protected void setup(Context context) throws IOException, InterruptedException { protected void map(Object key, Variant value, Context context) throws IOException, InterruptedException { context.getCounter(COUNTER_GROUP_NAME, "variants").increment(1); if (baseOutputPath == null || !consecutiveChromosomes(chromosome, value.getChromosome())) { - baseOutputPath = buildOutputKeyPrefix(value.getChromosome(), value.getStart()); + baseOutputPath = buildOutputKeyPrefix(NAMED_OUTPUT, value.getChromosome(), value.getStart()); chromosome = value.getChromosome(); } mos.write(NAMED_OUTPUT, value, NullWritable.get(), baseOutputPath); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index c9a42c9be7..0985b6c0f6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -148,9 +148,13 @@ protected void setupJob(Job job) throws IOException { StreamVariantMapper.setMaxInputBytesPerProcess(job, maxBytesPerMap); StreamVariantMapper.setEnvironment(job, envVars); - // Current implementation only supports using the reduce step - useReduceStep = true; - reducerClass = StreamVariantReducer.class; + if (useReduceStep) { + reducerClass = StreamVariantReducer.class; + StreamVariantMapper.setHasReduce(job, true); + } else { + reducerClass = null; + StreamVariantMapper.setHasReduce(job, false); + } MultipleOutputs.addNamedOutput(job, STDOUT_NAMED_OUTPUT, ValueOnlyTextOutputFormat.class, keyClass, valueClass); MultipleOutputs.addNamedOutput(job, STDERR_NAMED_OUTPUT, ValueOnlyTextOutputFormat.class, keyClass, valueClass); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index f0a1de0f73..4805e5fe6b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -10,6 +10,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.hadoop.util.LineReader; import org.apache.hadoop.util.StopWatch; import org.opencb.biodata.models.variant.Variant; @@ -29,16 +30,20 @@ import java.util.*; import java.util.concurrent.TimeUnit; +import static org.opencb.opencga.storage.hadoop.variant.io.VariantExporterDriver.VariantExporterDirectMultipleOutputsMapper.buildOutputKeyPrefix; +import static org.opencb.opencga.storage.hadoop.variant.mr.StreamVariantDriver.STDERR_NAMED_OUTPUT; +import static org.opencb.opencga.storage.hadoop.variant.mr.StreamVariantDriver.STDOUT_NAMED_OUTPUT; import static org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper.COUNTER_GROUP_NAME; public class StreamVariantMapper extends VariantMapper { private static final Log LOG = LogFactory.getLog(StreamVariantMapper.class); private static final int BUFFER_SIZE = 128 * 1024; - public static final String MAX_INPUT_BYTES_PER_PROCESS = "stream.maxInputBytesPerProcess"; + public static final String MAX_INPUT_BYTES_PER_PROCESS = "opencga.variant.stream.maxInputBytesPerProcess"; public static final String VARIANT_FORMAT = "opencga.variant.stream.format"; - public static final String COMMANDLINE_BASE64 = "opencga.variant.commandline_base64"; - public static final String ADDENVIRONMENT_PARAM = "opencga.variant.addenvironment"; + public static final String COMMANDLINE_BASE64 = "opencga.variant.stream.commandline_base64"; + public static final String ADDENVIRONMENT_PARAM = "opencga.variant.stream.addenvironment"; + public static final String HAS_REDUCE = "opencga.variant.stream.hasReduce"; private final boolean verboseStdout = false; private static final long REPORTER_OUT_DELAY = 10 * 1000L; @@ -54,6 +59,7 @@ public class StreamVariantMapper extends VariantMapper { private Query query; private QueryOptions options; private String firstVariant; + private boolean multipleOutputs; private int processCount = 0; @@ -71,6 +77,7 @@ public class StreamVariantMapper extends VariantMapper { private int processedBytes = 0; private long numRecordsRead = 0; private long numRecordsWritten = 0; + private MultipleOutputs mos; // auto-incremental number for each produced record. // These are used with the VariantLocusKey to ensure a sorted output. private int stdoutKeyNum; @@ -94,6 +101,10 @@ public static void setMaxInputBytesPerProcess(Job job, int maxInputBytesPerProce job.getConfiguration().setInt(MAX_INPUT_BYTES_PER_PROCESS, maxInputBytesPerProcess); } + public static void setHasReduce(Job job, boolean hasReduce) { + job.getConfiguration().setBoolean(HAS_REDUCE, hasReduce); + } + @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); @@ -104,6 +115,14 @@ protected void setup(Context context) throws IOException, InterruptedException { if (!format.isPlain()) { format = format.inPlain(); } + if (conf.getBoolean(HAS_REDUCE, false)) { + // If the job has a reduce step, the output will be written by the reducer + // No need to write the output here + multipleOutputs = false; + } else { + // If the job does not have a reduce step, the output will be written by the mapper + multipleOutputs = true; + } envs = new HashMap<>(); addEnvironment(envs, conf); @@ -327,6 +346,15 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept } catch (Throwable th) { addException(th); } + + try { + if (mos != null) { + mos.close(); + mos = null; + } + } catch (Throwable th) { + addException(th); + } // drainStdout(context); } @@ -340,6 +368,9 @@ private void startProcess(Context context) throws IOException, StorageEngineExce if (firstVariant == null) { firstVariant = variant.getChromosome() + ":" + variant.getStart(); } + if (multipleOutputs) { + mos = new MultipleOutputs<>(context); + } stdoutKeyNum = 0; stderrKeyNum = 0; @@ -469,8 +500,13 @@ public void run() { private void write(Text line) throws IOException, InterruptedException { numRecords++; - context.write(new VariantLocusKey(currentChromosome, currentPosition, - StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)), line); + VariantLocusKey locusKey = new VariantLocusKey(currentChromosome, currentPosition, + StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)); + if (multipleOutputs) { + mos.write(STDOUT_NAMED_OUTPUT, locusKey, line, buildOutputKeyPrefix(STDOUT_NAMED_OUTPUT, currentChromosome, currentPosition)); + } else { + context.write(locusKey, line); + } } } @@ -551,8 +587,14 @@ private void write(String line) throws IOException, InterruptedException { } private void write(Text line) throws IOException, InterruptedException { - context.write(new VariantLocusKey(currentChromosome, currentPosition, - StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)), line); + VariantLocusKey locusKey = new VariantLocusKey(currentChromosome, currentPosition, + StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)); + + if (multipleOutputs) { + mos.write(STDERR_NAMED_OUTPUT, locusKey, line, buildOutputKeyPrefix(STDERR_NAMED_OUTPUT, currentChromosome, currentPosition)); + } else { + context.write(locusKey, line); + } } private boolean matchesReporter(String line) { From 0100097cdfa5b1c4dc8398b93931062767f31c5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 7 Nov 2024 16:14:29 +0000 Subject: [PATCH 047/122] storage: Fix VariantRecordWriter bytes_written counter. #TASK-6722 --- .../opencga/storage/hadoop/utils/AbstractHBaseDriver.java | 7 +++---- .../storage/hadoop/variant/io/HadoopVariantExporter.java | 4 ++-- .../storage/hadoop/variant/io/VariantExporterDriver.java | 2 +- .../storage/hadoop/variant/mr/StreamVariantMapper.java | 6 ++++-- .../storage/hadoop/variant/mr/VariantFileOutputFormat.java | 7 ++++--- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 7a069d6efa..685ece6c45 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -368,7 +368,6 @@ protected Path getTempOutdir(String prefix, String suffix, boolean ensureHdfs) t } } } - LOGGER.info("Temporary directory: " + toUri(tmpDir)); return new Path(tmpDir, fileName); } @@ -454,10 +453,10 @@ protected void getOutputPath() throws IOException { outdir.getFileSystem(getConf()).deleteOnExit(outdir); } if (localOutput != null) { - LOGGER.info(" * Outdir file: " + toUri(localOutput)); - LOGGER.info(" * Temporary outdir file: " + toUri(outdir)); + LOGGER.info(" * Output file : " + toUri(localOutput)); + LOGGER.info(" * Temporary outdir : " + toUri(outdir)); } else { - LOGGER.info(" * Outdir file: " + toUri(outdir)); + LOGGER.info(" * Outdir: " + toUri(outdir)); } } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java index b16ef09361..53511b8473 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java @@ -181,13 +181,13 @@ public List export(@Nullable URI outputFileUri, VariantWriterFactory.Varian + " Current variants threshold is {}, and matchRatioThreshold is {}% ." + " Consider small query." + " Skip MapReduce", - count, totalCount, String.format("%.4f", matchRate * 100), variantsThreshold, matchRatioThreshold); + count, totalCount, String.format("%.4f", matchRate * 100), variantsThreshold, matchRatioThreshold * 100); smallQuery = true; } else { logger.info("Query for approximately {} of {} variants, which is {}% of the total." + " Current variants threshold is {}, and matchRatioThreshold is {}% ." + " Not a small query", - count, totalCount, String.format("%.3f", matchRate * 100), variantsThreshold, matchRatioThreshold); + count, totalCount, String.format("%.3f", matchRate * 100), variantsThreshold, matchRatioThreshold * 100); } } catch (VariantSearchException e) { logger.info("Unable to count variants from SearchEngine", e); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index 4c7abd3286..b29287a407 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -201,7 +201,7 @@ public static String buildOutputKeyPrefix(String namedOutput, String chromosome, chromosome = "0" + chromosome; } - return String.format("%s.%s.%010d.", namedOutput, chromosome, start); + return String.format("%s.%s.%010d", namedOutput, chromosome, start); } private MultipleOutputs mos; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 4805e5fe6b..2af52990fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -503,7 +503,8 @@ private void write(Text line) throws IOException, InterruptedException { VariantLocusKey locusKey = new VariantLocusKey(currentChromosome, currentPosition, StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)); if (multipleOutputs) { - mos.write(STDOUT_NAMED_OUTPUT, locusKey, line, buildOutputKeyPrefix(STDOUT_NAMED_OUTPUT, currentChromosome, currentPosition)); + mos.write(STDOUT_NAMED_OUTPUT, locusKey, line, + buildOutputKeyPrefix(STDOUT_NAMED_OUTPUT, currentChromosome, currentPosition)); } else { context.write(locusKey, line); } @@ -591,7 +592,8 @@ private void write(Text line) throws IOException, InterruptedException { StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)); if (multipleOutputs) { - mos.write(STDERR_NAMED_OUTPUT, locusKey, line, buildOutputKeyPrefix(STDERR_NAMED_OUTPUT, currentChromosome, currentPosition)); + mos.write(STDERR_NAMED_OUTPUT, locusKey, line, + buildOutputKeyPrefix(STDERR_NAMED_OUTPUT, currentChromosome, currentPosition)); } else { context.write(locusKey, line); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java index 16dd0fffa3..ddff988a11 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java @@ -73,7 +73,8 @@ public RecordWriter getRecordWriter(TaskAttemptContext jo if (isCompressed) { out = new DataOutputStream(codec.createOutputStream(out)); } - return new VariantRecordWriter(configureWriter(job, out), out); + CountingOutputStream countingOut = new CountingOutputStream(out); + return new VariantRecordWriter(configureWriter(job, countingOut), countingOut); } private DataWriter configureWriter(final TaskAttemptContext job, OutputStream fileOut) throws IOException { @@ -103,9 +104,9 @@ protected static class VariantRecordWriter extends RecordWriter writer; private final CountingOutputStream outputStream; - public VariantRecordWriter(DataWriter writer, OutputStream outputStream) { + public VariantRecordWriter(DataWriter writer, CountingOutputStream outputStream) { this.writer = writer; - this.outputStream = new CountingOutputStream(outputStream); + this.outputStream = outputStream; } @Override From 80943c36a43f5fa13b6e8d2aa34d58864a1c1c80 Mon Sep 17 00:00:00 2001 From: pfurio Date: Fri, 8 Nov 2024 13:56:10 +0100 Subject: [PATCH 048/122] catalog: fix sync user functionality, #TASK-7187 --- .../opencga/catalog/managers/UserManager.java | 9 +++----- .../catalog/managers/UserManagerTest.java | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java index 7e8e8cdffe..c5f35a3afa 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java @@ -303,9 +303,8 @@ public JwtPayload validateToken(String token) throws CatalogException { } public void syncAllUsersOfExternalGroup(String organizationId, String study, String authOrigin, String token) throws CatalogException { - if (!OPENCGA.equals(authenticationFactory.getUserId(organizationId, authOrigin, token))) { - throw new CatalogAuthorizationException("Only the root user can perform this action"); - } + JwtPayload payload = validateToken(token); + authorizationManager.checkIsOpencgaAdministrator(payload); OpenCGAResult allGroups = catalogManager.getStudyManager().getGroup(study, null, token); @@ -392,9 +391,7 @@ public void importRemoteGroupOfUsers(String organizationId, String authOrigin, S .append("sync", sync) .append("token", token); try { - if (!OPENCGA.equals(authenticationFactory.getUserId(organizationId, authOrigin, token))) { - throw new CatalogAuthorizationException("Only the root user can perform this action"); - } + authorizationManager.checkIsOpencgaAdministrator(payload); ParamUtils.checkParameter(authOrigin, "Authentication origin"); ParamUtils.checkParameter(remoteGroup, "Remote group"); diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java index f23295fbd6..acd1fdaf5c 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java @@ -13,13 +13,17 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.TestParamConstants; +import org.opencb.opencga.catalog.db.api.OrganizationDBAdaptor; import org.opencb.opencga.catalog.db.api.UserDBAdaptor; import org.opencb.opencga.catalog.exceptions.*; +import org.opencb.opencga.catalog.utils.Constants; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.common.PasswordUtils; import org.opencb.opencga.core.common.TimeUtils; +import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.models.JwtPayload; +import org.opencb.opencga.core.models.organizations.OrganizationConfiguration; import org.opencb.opencga.core.models.organizations.OrganizationCreateParams; import org.opencb.opencga.core.models.organizations.OrganizationUpdateParams; import org.opencb.opencga.core.models.project.Project; @@ -660,5 +664,22 @@ public void importLdapGroups() throws CatalogException, IOException { catalogManager.getUserManager().importRemoteGroupOfUsers(organizationId, "ldap", remoteGroup, internalGroup, studyFqn, true, getAdminToken()); } + @Test + public void syncUsersTest() throws CatalogException { + Map actionMap = new HashMap<>(); + actionMap.put(OrganizationDBAdaptor.AUTH_ORIGINS_FIELD, ParamUtils.UpdateAction.ADD); + QueryOptions queryOptions = new QueryOptions(Constants.ACTIONS, actionMap); + + List authenticationOrigins = Collections.singletonList(new AuthenticationOrigin("CAS", + AuthenticationOrigin.AuthenticationType.SSO, null, null)); + OrganizationConfiguration organizationConfiguration = new OrganizationConfiguration() + .setAuthenticationOrigins(authenticationOrigins); + catalogManager.getOrganizationManager().updateConfiguration(organizationId, organizationConfiguration, queryOptions, orgAdminToken1); + + catalogManager.getUserManager().importRemoteGroupOfUsers(organizationId, "CAS", "opencb", "opencb", studyFqn, true, opencgaToken); + OpenCGAResult opencb = catalogManager.getStudyManager().getGroup(studyFqn, "opencb", studyAdminToken1); + assertEquals(1, opencb.getNumResults()); + assertEquals("@opencb", opencb.first().getId()); + } } From b52ca2738e62e9df3ec0bbe79a43255dc9dff53f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 8 Nov 2024 19:42:38 +0000 Subject: [PATCH 049/122] storage: Reduce number of intermediate mapper files. #TASK-6722 --- .../variant/io/VariantExporterDriver.java | 13 +---- .../variant/mr/StreamVariantMapper.java | 30 ++++++----- .../hadoop/variant/mr/VariantLocusKey.java | 54 +++++++++++++++++++ .../variant/mr/VariantLocusKeyTest.java | 22 ++++++++ 4 files changed, 93 insertions(+), 26 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index b29287a407..7b1e96f22e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -215,24 +215,13 @@ protected void setup(Context context) throws IOException, InterruptedException { @Override protected void map(Object key, Variant value, Context context) throws IOException, InterruptedException { context.getCounter(COUNTER_GROUP_NAME, "variants").increment(1); - if (baseOutputPath == null || !consecutiveChromosomes(chromosome, value.getChromosome())) { + if (baseOutputPath == null || !VariantLocusKey.naturalConsecutiveChromosomes(chromosome, value.getChromosome())) { baseOutputPath = buildOutputKeyPrefix(NAMED_OUTPUT, value.getChromosome(), value.getStart()); chromosome = value.getChromosome(); } mos.write(NAMED_OUTPUT, value, NullWritable.get(), baseOutputPath); } - private static boolean consecutiveChromosomes(String prevChromosome, String newChromosome) { - if (newChromosome.equals(prevChromosome)) { - return true; - } - if (VariantLocusKey.isSingleDigitChromosome(prevChromosome)) { - return VariantLocusKey.isSingleDigitChromosome(newChromosome); - } else { - return !VariantLocusKey.isSingleDigitChromosome(newChromosome); - } - } - @Override protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { super.cleanup(context); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 2af52990fb..b490f25150 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -78,6 +78,8 @@ public class StreamVariantMapper extends VariantMapper { private long numRecordsRead = 0; private long numRecordsWritten = 0; private MultipleOutputs mos; + private String stdoutBaseOutputPath; + private String stderrBaseOutputPath; // auto-incremental number for each produced record. // These are used with the VariantLocusKey to ensure a sorted output. private int stdoutKeyNum; @@ -150,12 +152,11 @@ public void run(Context context) throws IOException, InterruptedException { // or if the chromosome changes if (processedBytes > maxInputBytesPerProcess) { LOG.info("Processed bytes = " + processedBytes + " > " + maxInputBytesPerProcess + ". Restarting process."); - restartProcess(context, "bytes_limit"); - } else if (!currentChromosome.equals(currentValue.getChromosome())) { - // TODO: Should we change only when the chromosome change would produce a partition change? + restartProcess(context, "bytes_limit", false); + } else if (!VariantLocusKey.naturalConsecutiveChromosomes(currentChromosome, currentValue.getChromosome())) { LOG.info("Chromosome changed from " + currentChromosome + " to " + currentValue.getChromosome() + ". Restarting process."); - restartProcess(context, "chr_change"); + restartProcess(context, "chr_change", true); } map(context.getCurrentKey(), currentValue, context); } while (!hasExceptions() && context.nextKeyValue()); @@ -193,10 +194,10 @@ public void run(Context context) throws IOException, InterruptedException { throwExceptionIfAny(); } - private void restartProcess(Mapper.Context context, String reason) + private void restartProcess(Mapper.Context context, String reason, boolean restartOutput) throws IOException, InterruptedException, StorageEngineException { context.getCounter(COUNTER_GROUP_NAME, "restarted_process_" + reason).increment(1); - closeProcess(context); + closeProcess(context, restartOutput); startProcess(context); } @@ -267,7 +268,7 @@ private void throwExceptionIfAny() throws IOException { @Override protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { - closeProcess(context); + closeProcess(context, true); dockerPruneImages(); super.cleanup(context); } @@ -300,7 +301,7 @@ protected void map(Object key, Variant value, Context context) throws IOExceptio processedBytes = stdin.size(); } - private void closeProcess(Context context) throws IOException, InterruptedException { + private void closeProcess(Context context, boolean closeOutputs) throws IOException, InterruptedException { try { if (variantDataWriter != null) { @@ -348,7 +349,8 @@ private void closeProcess(Context context) throws IOException, InterruptedExcept } try { - if (mos != null) { + // Close the MultipleOutputs if required + if (mos != null && closeOutputs) { mos.close(); mos = null; } @@ -368,8 +370,10 @@ private void startProcess(Context context) throws IOException, StorageEngineExce if (firstVariant == null) { firstVariant = variant.getChromosome() + ":" + variant.getStart(); } - if (multipleOutputs) { + if (multipleOutputs && mos == null) { mos = new MultipleOutputs<>(context); + stdoutBaseOutputPath = buildOutputKeyPrefix(STDOUT_NAMED_OUTPUT, currentChromosome, currentPosition); + stderrBaseOutputPath = buildOutputKeyPrefix(STDERR_NAMED_OUTPUT, currentChromosome, currentPosition); } stdoutKeyNum = 0; stderrKeyNum = 0; @@ -503,8 +507,7 @@ private void write(Text line) throws IOException, InterruptedException { VariantLocusKey locusKey = new VariantLocusKey(currentChromosome, currentPosition, StreamVariantReducer.STDOUT_KEY + (stdoutKeyNum++)); if (multipleOutputs) { - mos.write(STDOUT_NAMED_OUTPUT, locusKey, line, - buildOutputKeyPrefix(STDOUT_NAMED_OUTPUT, currentChromosome, currentPosition)); + mos.write(STDOUT_NAMED_OUTPUT, locusKey, line, stdoutBaseOutputPath); } else { context.write(locusKey, line); } @@ -592,8 +595,7 @@ private void write(Text line) throws IOException, InterruptedException { StreamVariantReducer.STDERR_KEY + (stderrKeyNum++)); if (multipleOutputs) { - mos.write(STDERR_NAMED_OUTPUT, locusKey, line, - buildOutputKeyPrefix(STDERR_NAMED_OUTPUT, currentChromosome, currentPosition)); + mos.write(STDERR_NAMED_OUTPUT, locusKey, line, stderrBaseOutputPath); } else { context.write(locusKey, line); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java index ce6d492612..b04f6fc601 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKey.java @@ -35,6 +35,56 @@ public VariantLocusKey(String chromosome, int position, String other) { this.other = other; } + /** + * Check if two lexicographically ordered chromosomes are consecutive in natural order or if there + * might be other chromosomes in between. + * e.g. + * naturalConsecutiveChromosomes("1", "2") == true + * naturalConsecutiveChromosomes("1", "10") == false + * naturalConsecutiveChromosomes("1", "X") == true + * @param prevChromosome Previous chromosome + * @param newChromosome New chromosome + * @return True if the chromosomes are consecutive in natural order + */ + public static boolean naturalConsecutiveChromosomes(String prevChromosome, String newChromosome) { + if (newChromosome.equals(prevChromosome)) { + return true; + } + if (isDigitChromosome(prevChromosome)) { + // prevChromosome == 1 or 10 + if (isSingleDigitChromosome(prevChromosome)) { + // prevChromosome == 1 + if (isDigitChromosome(newChromosome)) { + // newChromosome == 2 or 10 + // 1 -> 2 : TRUE + // 1 -> 10 : FALSE + return isSingleDigitChromosome(newChromosome); + } else { + // newChromosome == X + // 1 -> X : FALSE + return false; + } + } else { + // prevChromosome == 10 + if (isDigitChromosome(newChromosome)) { + // newChromosome == 11 or 2 + // 10 -> 11 : TRUE + // 10 -> 2 : FALSE + return !isSingleDigitChromosome(newChromosome); + } else { + // newChromosome == X + // 10 -> X : FALSE + return false; + } + } + } else { + // prevChromosome == X + // X -> Y : TRUE + // X -> 1 : FALSE + return !isDigitChromosome(newChromosome); + } + } + @Override public int compareTo(VariantLocusKey o) { String chr1; @@ -69,6 +119,10 @@ public static boolean isSingleDigitChromosome(String chromosome) { return Character.isDigit(chromosome.charAt(0)) && (chromosome.length() == 1 || !Character.isDigit(chromosome.charAt(1))); } + private static boolean isDigitChromosome(String chromosome) { + return Character.isDigit(chromosome.charAt(0)); + } + @Override public void write(DataOutput out) throws IOException { out.writeUTF(chromosome); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java index 74552d1f24..0535d0e00d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantLocusKeyTest.java @@ -175,4 +175,26 @@ private static void testWriteAndRead(VariantLocusKey originalKey) throws IOExcep // Assert that the read object is equal to the original object assertEquals(originalKey, readKey); } + + @Test + public void shouldTestConsecutiveChromosomesWithAlternateConfigs() { + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1", "1")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1", "1_random")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1", "2")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1", "3")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("10", "11")); + assertFalse(VariantLocusKey.naturalConsecutiveChromosomes("1", "10")); + assertFalse(VariantLocusKey.naturalConsecutiveChromosomes("9", "10")); + assertFalse(VariantLocusKey.naturalConsecutiveChromosomes("2", "20")); + assertFalse(VariantLocusKey.naturalConsecutiveChromosomes("22", "X")); + assertFalse(VariantLocusKey.naturalConsecutiveChromosomes("1", "X")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("X", "Y")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("X", "Z")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1_random", "1_random")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1_randomA", "1_randomB")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1_randomB", "1_randomA")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("1_random", "2_random")); + assertTrue(VariantLocusKey.naturalConsecutiveChromosomes("10_random", "11_random")); + assertFalse(VariantLocusKey.naturalConsecutiveChromosomes("1_random", "10_random")); + } } \ No newline at end of file From ad3521e0eb5d3f6265cb610a0bd2ece768623705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 8 Nov 2024 23:04:51 +0000 Subject: [PATCH 050/122] storage: Use SNAPPY as intermediate compression algorithm. #TASK-6722 --- .../hadoop/utils/AbstractHBaseDriver.java | 106 ++++++++++++++---- .../variant/io/VariantExporterDriver.java | 10 +- .../variant/mr/StreamVariantDriver.java | 12 +- 3 files changed, 97 insertions(+), 31 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 685ece6c45..8fbd131b72 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -7,16 +7,21 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.compress.*; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -44,8 +49,6 @@ import java.util.*; import java.util.function.Supplier; import java.util.stream.Collectors; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; import static org.opencb.opencga.core.common.IOUtils.humanReadableByteCount; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.MR_EXECUTOR_SSH_PASSWORD; @@ -196,6 +199,11 @@ public final int run(String[] args) throws Exception { LOGGER.info(" - OutputTable : " + job.getConfiguration().get(TableOutputFormat.OUTPUT_TABLE)); } else if (StringUtils.isNotEmpty(job.getConfiguration().get(FileOutputFormat.OUTDIR))) { LOGGER.info(" - Outdir : " + job.getConfiguration().get(FileOutputFormat.OUTDIR)); + + if (TextOutputFormat.getCompressOutput(job)) { + Class compressorClass = TextOutputFormat.getOutputCompressorClass(job, GzipCodec.class); + LOGGER.info(" - Compress : " + compressorClass.getName()); + } } LOGGER.info("================================================="); reportRunningJobs(); @@ -431,18 +439,18 @@ public class MapReduceOutputFile { public MapReduceOutputFile(String tempFilePrefix) throws IOException { this.nameGenerator = () -> null; this.tempFilePrefix = tempFilePrefix; - getOutputPath(); + initOutputPath(); namedOutput = null; } public MapReduceOutputFile(Supplier nameGenerator, String tempFilePrefix) throws IOException { this.nameGenerator = nameGenerator; this.tempFilePrefix = tempFilePrefix; - getOutputPath(); + initOutputPath(); namedOutput = null; } - protected void getOutputPath() throws IOException { + private void initOutputPath() throws IOException { String outdirStr = getParam(OUTPUT_PARAM); if (StringUtils.isNotEmpty(outdirStr)) { outdir = new Path(outdirStr); @@ -452,7 +460,7 @@ protected void getOutputPath() throws IOException { outdir = getTempOutdir(tempFilePrefix, localOutput.getName()); outdir.getFileSystem(getConf()).deleteOnExit(outdir); } - if (localOutput != null) { + if (hasTempOutput()) { LOGGER.info(" * Output file : " + toUri(localOutput)); LOGGER.info(" * Temporary outdir : " + toUri(outdir)); } else { @@ -463,15 +471,19 @@ protected void getOutputPath() throws IOException { public void postExecute(boolean succeed) throws IOException { if (succeed) { - if (localOutput != null) { + if (hasTempOutput()) { getConcatMrOutputToLocal(); } } - if (localOutput != null) { + if (hasTempOutput()) { deleteTemporaryFile(outdir); } } + public boolean hasTempOutput() { + return localOutput != null; + } + public MapReduceOutputFile setNamedOutput(String partFilePrefix) { this.namedOutput = partFilePrefix; return this; @@ -556,27 +568,20 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool fileSystem.copyToLocalFile(false, paths.get(0), localOutput); } else { LOGGER.info("Concat and copy to local : " + paths.size() + " partial files"); - LOGGER.info(" Source : " + mrOutdir.toUri()); - LOGGER.info(" Target : " + localOutput.toUri()); + LOGGER.info(" Source {}: {}", getCompression(paths.get(0).getName()), mrOutdir.toUri()); + LOGGER.info(" Target {}: {}", getCompression(localOutput.getName()), localOutput.toUri()); LOGGER.info(" ---- "); - boolean isGzip = paths.get(0).getName().endsWith(".gz"); - try (FSDataOutputStream fsOs = localOutput.getFileSystem(getConf()).create(localOutput); - OutputStream gzOs = isGzip ? new GZIPOutputStream(fsOs) : null) { - OutputStream os = gzOs == null ? fsOs : gzOs; + + try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localOutput.getFileSystem(getConf()).create(localOutput))) { for (int i = 0; i < paths.size(); i++) { Path path = paths.get(i); - LOGGER.info("[{}] Concat {} : '{}' ({}) ", + LOGGER.info("[{}] Concat {} file : '{}' ({}) ", i, - isGzip ? "gzip file" : "file", + getCompression(path.getName()), path.toUri(), humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); - try (FSDataInputStream fsIs = fileSystem.open(path)) { - InputStream is; - if (isGzip) { - is = new GZIPInputStream(fsIs); - } else { - is = fsIs; - } + try (InputStream isAux = getInputStream(path.getName(), fileSystem.open(path))) { + InputStream is = isAux; // Remove extra headers from all files but the first if (removeExtraHeaders && i != 0) { BufferedReader br = new BufferedReader(new InputStreamReader(is)); @@ -600,6 +605,59 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool return paths; } + private static String getCompression(String name) throws IOException { + if (name.endsWith(".gz")) { + return "gzip"; + } else if (name.endsWith(".snappy")) { + return "snappy"; + } else if (name.endsWith(".lz4")) { + return "lz4"; + } else if (name.endsWith(".zst")) { + return "ztandard"; + } else { + return "plain"; + } + } + + private OutputStream getOutputStreamPlain(String name, OutputStream fsOs) throws IOException { + CompressionCodec codec = getCompressionCodec(name); + if (codec == null) { + return fsOs; + } + return codec.createOutputStream(fsOs); + } + + private CompressionCodec getCompressionCodec(String name) throws IOException { + Class codecClass; + switch (getCompression(name)) { + case "gzip": + codecClass = GzipCodec.class; + break; + case "snappy": + codecClass = SnappyCodec.class; + break; + case "lz4": + codecClass = Lz4Codec.class; + break; + case "ztandard": + codecClass = ZStandardCodec.class; + break; + case "plain": + return null; + default: + throw new IOException("Unknown compression codec for file " + name); + } + return ReflectionUtils.newInstance(codecClass, getConf()); + } + + private InputStream getInputStream(String name, InputStream is) throws IOException { + CompressionCodec codec = getCompressionCodec(name); + if (codec == null) { + return is; + } + return codec.createInputStream(is); + } + protected final int getServersSize(String table) throws IOException { int serversSize; try (HBaseManager hBaseManager = new HBaseManager(getConf())) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index 7b1e96f22e..93a75006fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -140,10 +140,12 @@ protected void setupJob(Job job) throws IOException { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); outputFormatClass = LazyOutputFormat.class; } - if (outputFormat.isGzip()) { - FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); // compression - } else if (outputFormat.isSnappy()) { - FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); // compression + if (SnappyCodec.isNativeCodeLoaded()) { + FileOutputFormat.setCompressOutput(job, true); + FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); + } else { + FileOutputFormat.setCompressOutput(job, true); + FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } job.getConfiguration().set(VariantFileOutputFormat.VARIANT_OUTPUT_FORMAT, outputFormat.name()); job.setOutputKeyClass(Variant.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index 0985b6c0f6..91ac57391d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -5,11 +5,13 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DeflateCodec; import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; @@ -162,9 +164,13 @@ protected void setupJob(Job job) throws IOException { outputFormatClass = LazyOutputFormat.class; job.setOutputFormatClass(ValueOnlyTextOutputFormat.class); - TextOutputFormat.setCompressOutput(job, true); - TextOutputFormat.setOutputCompressorClass(job, GzipCodec.class); -// TextOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); + if (SnappyCodec.isNativeCodeLoaded()) { + FileOutputFormat.setCompressOutput(job, true); + FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); + } else { + FileOutputFormat.setCompressOutput(job, true); + FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); + } job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); } From ab50d6ef738e4c5cd6687c07d84161e23b744a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 11 Nov 2024 09:08:32 +0000 Subject: [PATCH 051/122] storage: Disable flush on AbfsOutputStream. HADOOP-16548 #TASK-6722 --- .../hadoop/variant/io/CountingOutputStream.java | 5 +++++ .../variant/mr/VariantFileOutputFormat.java | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java index 93f3dcd9bf..5a50d3293a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/CountingOutputStream.java @@ -30,6 +30,11 @@ public void write(byte[] b, int off, int len) throws IOException { count += len; } + @Override + public void close() throws IOException { + out.close(); + } + public long getByteCount() { return count; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java index ddff988a11..248bcc5d16 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java @@ -17,8 +17,10 @@ package org.opencb.opencga.storage.hadoop.variant.mr; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; @@ -37,6 +39,7 @@ import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; import java.io.DataOutputStream; +import java.io.FilterOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -69,7 +72,19 @@ public RecordWriter getRecordWriter(TaskAttemptContext jo } Path file = this.getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); - OutputStream out = fs.create(file, false); + FSDataOutputStream fsOs = fs.create(file, false); + OutputStream out; + if (fsOs.getWrappedStream() instanceof AbfsOutputStream) { + // Disable flush on ABFS. See HADOOP-16548 + out = new FilterOutputStream(fsOs) { + @Override + public void flush() throws IOException { + // Do nothing + } + }; + } else { + out = fsOs; + } if (isCompressed) { out = new DataOutputStream(codec.createOutputStream(out)); } From 212f8ce32503059da25a489a9b9850584b6beeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 11 Nov 2024 10:34:23 +0000 Subject: [PATCH 052/122] storage: Centralize variantMapperJob initialitation. #TASK-6722 --- .../hadoop/utils/AbstractHBaseDriver.java | 9 ++++ .../analysis/julie/JulieToolDriver.java | 2 +- .../hadoop/variant/io/VariantDriver.java | 35 +----------- .../AbstractHBaseVariantTableInputFormat.java | 1 + .../mr/SampleIndexTableRecordReader.java | 8 +++ .../variant/mr/VariantMapReduceUtil.java | 53 ++++++++++--------- 6 files changed, 48 insertions(+), 60 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index 8fbd131b72..d2d7a682eb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -37,6 +37,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.hadoop.io.HDFSIOConnector; import org.opencb.opencga.storage.hadoop.variant.executors.SshMRExecutor; +import org.opencb.opencga.storage.hadoop.variant.mr.AbstractHBaseVariantTableInputFormat; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -170,6 +171,14 @@ public final int run(String[] args) throws Exception { LOGGER.info(" * InputFormat : " + job.getInputFormatClass().getName()); if (StringUtils.isNotEmpty(job.getConfiguration().get(TableInputFormat.INPUT_TABLE))) { LOGGER.info(" - InputTable : " + job.getConfiguration().get(TableInputFormat.INPUT_TABLE)); + if (job.getConfiguration().getBoolean(AbstractHBaseVariantTableInputFormat.USE_SAMPLE_INDEX_TABLE_INPUT_FORMAT, false)) { + String sampleIndexTable = job.getConfiguration().get(AbstractHBaseVariantTableInputFormat.SAMPLE_INDEX_TABLE); + if (StringUtils.isNotEmpty(sampleIndexTable)) { + LOGGER.info(" - SecondarySampleIndexTable : " + sampleIndexTable); + } else { + LOGGER.info(" - SecondarySampleIndexTable : (not set)"); + } + } } else if (StringUtils.isNotEmpty(job.getConfiguration().get(PhoenixConfigurationUtil.INPUT_TABLE_NAME))) { LOGGER.info(" - InputPTable : " + job.getConfiguration().get(PhoenixConfigurationUtil.INPUT_TABLE_NAME)); } else if (StringUtils.isNotEmpty(job.getConfiguration().get(FileInputFormat.INPUT_DIR))) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java index 5192e7356f..25b80a7ef2 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java @@ -104,7 +104,7 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws VariantMapReduceUtil.configureMapReduceScan(scan, getConf()); logger.info("Scan: " + scan); - VariantMapReduceUtil.initVariantRowMapperJobFromHBase(job, variantTable, scan, JulieToolMapper.class, false); + VariantMapReduceUtil.initVariantRowMapperJobFromHBase(job, variantTable, scan, JulieToolMapper.class); VariantMapReduceUtil.setOutputHBaseTable(job, variantTable); VariantMapReduceUtil.setNoneReduce(job); VariantMapReduceUtil.setNoneTimestamp(job); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index 7a2324e17f..4a78cb70f6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -1,7 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.io; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; @@ -12,21 +11,13 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; -import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; -import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; -import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHBaseQueryParser; -import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantSqlQueryParser; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.List; import java.util.Map; import static org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil.getQueryFromConfig; @@ -110,31 +101,7 @@ protected final Job setupJob(Job job, String archiveTable, String variantTable) VariantMapReduceUtil.setNoneReduce(job); } - VariantQueryParser variantQueryParser = new HadoopVariantQueryParser(null, getMetadataManager()); - ParsedVariantQuery variantQuery = variantQueryParser.parseQuery(query, options); - Query query = variantQuery.getQuery(); - if (VariantHBaseQueryParser.isSupportedQuery(query)) { - logger.info("Init MapReduce job reading from HBase"); - boolean useSampleIndex = !getConf().getBoolean("skipSampleIndex", false) && SampleIndexQueryParser.validSampleIndexQuery(query); - if (useSampleIndex) { - // Remove extra fields from the query - new SampleIndexDBAdaptor(getHBaseManager(), getTableNameGenerator(), getMetadataManager()).parseSampleIndexQuery(query); - - logger.info("Use sample index to read from HBase"); - } - - VariantHBaseQueryParser parser = new VariantHBaseQueryParser(getMetadataManager()); - List scans = parser.parseQueryMultiRegion(variantQuery, options); - VariantMapReduceUtil.configureMapReduceScans(scans, getConf()); - - VariantMapReduceUtil.initVariantMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex); - } else { - logger.info("Init MapReduce job reading from Phoenix"); - String sql = new VariantSqlQueryParser(variantTable, getMetadataManager(), getHelper().getConf()) - .parse(variantQuery, options); - - VariantMapReduceUtil.initVariantMapperJobFromPhoenix(job, variantTable, sql, mapperClass); - } + VariantMapReduceUtil.initVariantMapperJob(job, mapperClass, variantTable, getMetadataManager(), query, options, false); setNoneTimestamp(job); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/AbstractHBaseVariantTableInputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/AbstractHBaseVariantTableInputFormat.java index a1d8bc1695..35965e8d87 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/AbstractHBaseVariantTableInputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/AbstractHBaseVariantTableInputFormat.java @@ -14,6 +14,7 @@ public abstract class AbstractHBaseVariantTableInputFormat extends TransformInputFormat { + public static final String SAMPLE_INDEX_TABLE = "hbase_variant_table_input_format.sample_index_table"; public static final String USE_SAMPLE_INDEX_TABLE_INPUT_FORMAT = "hbase_variant_table_input_format.use_sample_index_table_input_format"; public static final String MULTI_SCANS = "hbase_variant_table_input_format.multi_scans"; private Function converter; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java index 7e747813b0..06df8d3015 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java @@ -72,6 +72,14 @@ public SampleIndexTableRecordReader(Configuration conf) { Query query = VariantMapReduceUtil.getQueryFromConfig(conf); sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); + String sampleIndexTableName = sampleIndexDBAdaptor.getSampleIndexTableName(sampleIndexQuery); + String expectedSampleIndexTable = conf.get(AbstractHBaseVariantTableInputFormat.SAMPLE_INDEX_TABLE); + if (expectedSampleIndexTable != null) { + if (!expectedSampleIndexTable.equals(sampleIndexTableName)) { + throw new IllegalArgumentException("Expected SampleIndexTable " + + expectedSampleIndexTable + " but got " + sampleIndexTableName); + } + } StudyMetadata studyMetadata = metadataManager.getStudyMetadata(sampleIndexQuery.getStudy()); allChromosomes = new TreeSet<>(VariantPhoenixKeyFactory.HBASE_KEY_CHROMOSOME_COMPARATOR); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index 0c3ab30a69..afaa220977 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -31,6 +31,8 @@ import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.hadoop.utils.AbstractHBaseDriver; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; @@ -158,7 +160,10 @@ public static void initTableMapperJobFromPhoenix(Job job, String variantTable, S public static void initVariantMapperJob(Job job, Class mapperClass, String variantTable, VariantStorageMetadataManager metadataManager, Query query, QueryOptions queryOptions, boolean skipSampleIndex) throws IOException { - query = new HadoopVariantQueryParser(null, metadataManager).preProcessQuery(query, queryOptions); + VariantQueryParser variantQueryParser = new HadoopVariantQueryParser(null, metadataManager); + ParsedVariantQuery variantQuery = variantQueryParser.parseQuery(query, queryOptions); + query = variantQuery.getQuery(); + queryOptions = variantQuery.getInputOptions(); setQuery(job, query); setQueryOptions(job, queryOptions); @@ -166,26 +171,29 @@ public static void initVariantMapperJob(Job job, Class LOGGER.info("Init MapReduce job reading from HBase"); boolean useSampleIndex = !skipSampleIndex && SampleIndexQueryParser.validSampleIndexQuery(query); + String sampleIndexTable = null; if (useSampleIndex) { Object regions = query.get(VariantQueryParam.REGION.key()); Object geneRegions = query.get(VariantQueryUtils.ANNOT_GENE_REGIONS.key()); // Remove extra fields from the query - SampleIndexQuery sampleIndexQuery = new SampleIndexDBAdaptor(null, null, metadataManager).parseSampleIndexQuery(query); + SampleIndexDBAdaptor sampleIndexDBAdaptor = new SampleIndexDBAdaptor(null, null, metadataManager); + SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); setSampleIndexConfiguration(job, sampleIndexQuery.getSchema().getConfiguration(), sampleIndexQuery.getSchema().getVersion()); + sampleIndexTable = sampleIndexDBAdaptor.getSampleIndexTableName(sampleIndexQuery); // Preserve regions and gene_regions query.put(VariantQueryParam.REGION.key(), regions); query.put(VariantQueryUtils.ANNOT_GENE_REGIONS.key(), geneRegions); - LOGGER.info("Use sample index to read from HBase"); + LOGGER.info("Use sample index to read from HBase from table '{}'", sampleIndexTable); } VariantHBaseQueryParser parser = new VariantHBaseQueryParser(metadataManager); - List scans = parser.parseQueryMultiRegion(query, queryOptions); + List scans = parser.parseQueryMultiRegion(variantQuery, queryOptions); configureMapReduceScans(scans, job.getConfiguration()); - initVariantMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex); + initVariantMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex, sampleIndexTable); int i = 0; for (Scan scan : scans) { @@ -194,7 +202,7 @@ public static void initVariantMapperJob(Job job, Class } else { LOGGER.info("Init MapReduce job reading from Phoenix"); String sql = new VariantSqlQueryParser(variantTable, metadataManager, job.getConfiguration()) - .parse(query, queryOptions); + .parse(variantQuery, queryOptions); initVariantMapperJobFromPhoenix(job, variantTable, sql, mapperClass); } @@ -203,17 +211,12 @@ public static void initVariantMapperJob(Job job, Class public static void initVariantMapperJobFromHBase(Job job, String variantTableName, Scan scan, Class variantMapperClass) throws IOException { - initVariantMapperJobFromHBase(job, variantTableName, scan, variantMapperClass, false); - } - - public static void initVariantMapperJobFromHBase(Job job, String variantTableName, Scan scan, - Class variantMapperClass, boolean useSampleIndex) - throws IOException { - initVariantMapperJobFromHBase(job, variantTableName, Collections.singletonList(scan), variantMapperClass, useSampleIndex); + initVariantMapperJobFromHBase(job, variantTableName, Collections.singletonList(scan), variantMapperClass, false, null); } public static void initVariantMapperJobFromHBase(Job job, String variantTableName, List scans, - Class variantMapperClass, boolean useSampleIndex) + Class variantMapperClass, boolean useSampleIndex, + String sampleIndexTable) throws IOException { initTableMapperJob(job, variantTableName, scans, TableMapper.class); @@ -223,6 +226,7 @@ public static void initVariantMapperJobFromHBase(Job job, String variantTableNam job.setInputFormatClass(HBaseVariantTableInputFormat.class); job.getConfiguration().setBoolean(HBaseVariantTableInputFormat.MULTI_SCANS, scans.size() > 1); job.getConfiguration().setBoolean(HBaseVariantTableInputFormat.USE_SAMPLE_INDEX_TABLE_INPUT_FORMAT, useSampleIndex); + job.getConfiguration().set(HBaseVariantTableInputFormat.SAMPLE_INDEX_TABLE, sampleIndexTable); } public static void initVariantMapperJobFromPhoenix(Job job, VariantHadoopDBAdaptor dbAdaptor, @@ -279,21 +283,24 @@ public static void initVariantRowMapperJob(Job job, Class scans = parser.parseQueryMultiRegion(query, queryOptions); configureMapReduceScans(scans, job.getConfiguration()); - initVariantRowMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex); + initVariantRowMapperJobFromHBase(job, variantTable, scans, mapperClass, useSampleIndex, sampleIndexTable); int i = 0; for (Scan scan : scans) { @@ -328,17 +335,12 @@ public static void setObjectMap(Job job, ObjectMap objectMap) { public static void initVariantRowMapperJobFromHBase(Job job, String variantTableName, Scan scan, Class variantMapperClass) throws IOException { - initVariantRowMapperJobFromHBase(job, variantTableName, scan, variantMapperClass, false); - } - - public static void initVariantRowMapperJobFromHBase(Job job, String variantTableName, Scan scan, - Class variantMapperClass, boolean useSampleIndex) - throws IOException { - initVariantRowMapperJobFromHBase(job, variantTableName, Collections.singletonList(scan), variantMapperClass, useSampleIndex); + initVariantRowMapperJobFromHBase(job, variantTableName, Collections.singletonList(scan), variantMapperClass, false, null); } public static void initVariantRowMapperJobFromHBase(Job job, String variantTableName, List scans, - Class variantMapperClass, boolean useSampleIndex) + Class variantMapperClass, boolean useSampleIndex, + String sampleIndexTable) throws IOException { initTableMapperJob(job, variantTableName, scans, TableMapper.class); @@ -348,6 +350,7 @@ public static void initVariantRowMapperJobFromHBase(Job job, String variantTable job.setInputFormatClass(HBaseVariantRowTableInputFormat.class); job.getConfiguration().setBoolean(HBaseVariantRowTableInputFormat.MULTI_SCANS, scans.size() > 1); job.getConfiguration().setBoolean(HBaseVariantRowTableInputFormat.USE_SAMPLE_INDEX_TABLE_INPUT_FORMAT, useSampleIndex); + job.getConfiguration().set(HBaseVariantRowTableInputFormat.SAMPLE_INDEX_TABLE, sampleIndexTable); } public static void initVariantRowMapperJobFromPhoenix(Job job, VariantHadoopDBAdaptor dbAdaptor, From 2a39303f1e39c0a980adffbd877748baf44e6b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 12 Nov 2024 10:09:58 +0000 Subject: [PATCH 053/122] storage: Fix NoClassDefFoundError tephra. #TASK-7194 #TASK-6722 --- opencga-app/app/conf/opencga-env.sh | 4 +- .../opencga-storage-hadoop-core/pom.xml | 6 +++ .../hadoop/utils/AbstractHBaseDriver.java | 1 + .../variant/AbstractVariantsTableDriver.java | 12 ++--- .../variant/mr/VariantMapReduceUtil.java | 45 ++++++++++--------- 5 files changed, 41 insertions(+), 27 deletions(-) diff --git a/opencga-app/app/conf/opencga-env.sh b/opencga-app/app/conf/opencga-env.sh index d2aa76b11e..475dddb8f3 100644 --- a/opencga-app/app/conf/opencga-env.sh +++ b/opencga-app/app/conf/opencga-env.sh @@ -104,6 +104,8 @@ if ( ls "${BASEDIR}"/libs/opencga-storage-hadoop-core-*.jar >/dev/null 2>&1 ) ; jackson=$(find "${BASEDIR}/libs/" -name "jackson-*-2.[0-9].[0-9].jar" | tr "\n" ":") proto=$(find "${BASEDIR}/libs/" -name "protobuf-java-*.jar" | tr "\n" ":") avro=$(find "${BASEDIR}/libs/" -name "avro-*.jar" | tr "\n" ":") - export HADOOP_CLASSPATH="${phoenix}:${proto}:${avro}:${jackson}:${CLASSPATH_PREFIX}" + tephra=$(find ${BASEDIR}/libs/ -name "tephra-*.jar" | tr "\n" ":") + disruptor=$(find ${BASEDIR}/libs/ -name "disruptor-*.jar" | tr "\n" ":") + export HADOOP_CLASSPATH="${phoenix}:${proto}:${avro}:${jackson}:${tephra}:${disruptor}:${CLASSPATH_PREFIX}" export HADOOP_USER_CLASSPATH_FIRST=true fi \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index ddc5d8e979..a024b4ec3f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -441,6 +441,12 @@ commons-lang:commons-lang + + org.apache.tephra:tephra-core + + + com.lmax:disruptor + diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index d2d7a682eb..b374703990 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -215,6 +215,7 @@ public final int run(String[] args) throws Exception { } } LOGGER.info("================================================="); + LOGGER.info("tmpjars=" + Arrays.toString(job.getConfiguration().getStrings("tmpjars"))); reportRunningJobs(); boolean succeed = executeJob(job); if (!succeed) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java index 35305dd4fd..356df234d3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java @@ -81,11 +81,6 @@ protected void parseAndValidateParameters() throws IOException { String archiveTable = getArchiveTable(); String variantTable = getVariantsTable(); - int maxKeyValueSize = conf.getInt(HadoopVariantStorageOptions.MR_HBASE_KEYVALUE_SIZE_MAX.key(), - HadoopVariantStorageOptions.MR_HBASE_KEYVALUE_SIZE_MAX.defaultValue()); - logger.info("HBASE: set " + ConnectionConfiguration.MAX_KEYVALUE_SIZE_KEY + " to " + maxKeyValueSize); - conf.setInt(ConnectionConfiguration.MAX_KEYVALUE_SIZE_KEY, maxKeyValueSize); // always overwrite server default (usually 1MB) - /* -------------------------------*/ // Validate parameters CHECK // if (StringUtils.isEmpty(archiveTable)) { @@ -114,11 +109,16 @@ protected void parseAndValidateParameters() throws IOException { checkTablesExist(hBaseManager, variantTable); } + int maxKeyValueSize = conf.getInt(HadoopVariantStorageOptions.MR_HBASE_KEYVALUE_SIZE_MAX.key(), + HadoopVariantStorageOptions.MR_HBASE_KEYVALUE_SIZE_MAX.defaultValue()); + logger.info("HBASE: set " + ConnectionConfiguration.MAX_KEYVALUE_SIZE_KEY + " to " + maxKeyValueSize); + conf.setInt(ConnectionConfiguration.MAX_KEYVALUE_SIZE_KEY, maxKeyValueSize); // always overwrite server default (usually 1MB) + // Increase the ScannerTimeoutPeriod to avoid ScannerTimeoutExceptions // See opencb/opencga#352 for more info. int scannerTimeout = getConf().getInt(HadoopVariantStorageOptions.MR_HBASE_SCANNER_TIMEOUT.key(), getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD)); - logger.info("Set Scanner timeout to " + scannerTimeout + " ..."); + logger.info("HBASE: set Scanner timeout to " + scannerTimeout + " ..."); conf.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, scannerTimeout); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index afaa220977..d6949e460d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.lmax.disruptor.EventFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.Mutation; @@ -21,7 +22,9 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil; +import org.apache.tephra.TransactionSystemClient; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -47,6 +50,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; +import org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -147,16 +151,6 @@ public static void initTableMapperJob(Job job, String inTable, List scans, } } - public static void initTableMapperJobFromPhoenix(Job job, String variantTable, String sql, - Class mapper) { - job.setInputFormatClass(CustomPhoenixInputFormat.class); - - LOGGER.info(sql); - PhoenixMapReduceUtil.setInput(job, ExposedResultSetDBWritable.class, variantTable, sql); - job.setMapperClass(mapper); - - } - public static void initVariantMapperJob(Job job, Class mapperClass, String variantTable, VariantStorageMetadataManager metadataManager, Query query, QueryOptions queryOptions, boolean skipSampleIndex) throws IOException { @@ -251,12 +245,8 @@ public static void initVariantMapperJobFromPhoenix(Job job, String variantTableN Class variantMapperClass) throws IOException { // VariantDBWritable is the DBWritable class that enables us to process the Result of the query - PhoenixMapReduceUtil.setInput(job, PhoenixVariantTableInputFormat.VariantDBWritable.class, variantTableName, sqlQuery); - - LOGGER.info(sqlQuery); - job.setMapperClass(variantMapperClass); - - job.setInputFormatClass(PhoenixVariantTableInputFormat.class); + initVariantMapperJobFromPhoenix(job, variantTableName, sqlQuery, variantMapperClass, + PhoenixVariantTableInputFormat.VariantDBWritable.class, PhoenixVariantTableInputFormat.class); } public static void initVariantRowMapperJob(Job job, Class mapperClass, String variantTable, @@ -374,13 +364,28 @@ public static void initVariantRowMapperJobFromPhoenix(Job job, VariantHadoopDBAd public static void initVariantRowMapperJobFromPhoenix(Job job, String variantTableName, String sqlQuery, Class variantMapperClass) throws IOException { + initVariantMapperJobFromPhoenix(job, variantTableName, sqlQuery, variantMapperClass, + ExposedResultSetDBWritable.class, PhoenixVariantRowTableInputFormat.class); + } - LOGGER.info(sqlQuery); - // VariantDBWritable is the DBWritable class that enables us to process the Result of the query - PhoenixMapReduceUtil.setInput(job, ExposedResultSetDBWritable.class, variantTableName, sqlQuery); + private static void initVariantMapperJobFromPhoenix(Job job, String variantTableName, String sqlQuery, + Class variantMapperClass, Class inputClass, + Class inputFormatClass) throws IOException { + boolean addDependencyJar = job.getConfiguration().getBoolean( + HadoopVariantStorageOptions.MR_ADD_DEPENDENCY_JARS.key(), + HadoopVariantStorageOptions.MR_ADD_DEPENDENCY_JARS.defaultValue()); + if (addDependencyJar) { + TableMapReduceUtil.addDependencyJars(job); + TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), + TransactionSystemClient.class, + EventFactory.class); + } + + LOGGER.info(sqlQuery); + PhoenixMapReduceUtil.setInput(job, inputClass, variantTableName, sqlQuery); job.setMapperClass(variantMapperClass); - job.setInputFormatClass(PhoenixVariantRowTableInputFormat.class); + job.setInputFormatClass(inputFormatClass); } public static void setNoneReduce(Job job) throws IOException { From ae26598d606a50002b357668f075d3d130111ae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 12 Nov 2024 10:13:56 +0000 Subject: [PATCH 054/122] storage: Fix NPE exporting from sampleindex. #TASK-6722 --- .../storage/core/variant/adaptors/VariantField.java | 9 +++++++-- .../storage/hadoop/variant/mr/VariantMapReduceUtil.java | 8 ++++++-- .../variant/utils/HBaseVariantTableNameGenerator.java | 5 +++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java index 8c208b1cd3..72abce21c7 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java @@ -16,6 +16,7 @@ package org.opencb.opencga.storage.core.variant.adaptors; +import org.apache.solr.common.StringUtils; import org.opencb.commons.datastore.core.QueryOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -197,11 +198,11 @@ public static Set getIncludeFields(QueryOptions options) { } List includeList = options.getAsStringList(QueryOptions.INCLUDE); - if (includeList != null && !includeList.isEmpty()) { + if (!isEmpty(includeList)) { includeFields = parseInclude(includeList); } else { List excludeList = options.getAsStringList(QueryOptions.EXCLUDE); - if (excludeList != null && !excludeList.isEmpty()) { + if (!isEmpty(excludeList)) { includeFields = parseExclude(excludeList); } else { includeFields = new HashSet<>(Arrays.asList(values())); @@ -214,6 +215,10 @@ public static Set getIncludeFields(QueryOptions options) { return includeFields; } + private static boolean isEmpty(List stringList) { + return stringList == null || stringList.isEmpty() || (stringList.size() == 1 && StringUtils.isEmpty(stringList.get(0))); + } + public static Set parseInclude(String... includeList) { return parseInclude(Arrays.asList(includeList)); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index d6949e460d..c55dfa6282 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -170,7 +170,9 @@ public static void initVariantMapperJob(Job job, Class Object regions = query.get(VariantQueryParam.REGION.key()); Object geneRegions = query.get(VariantQueryUtils.ANNOT_GENE_REGIONS.key()); // Remove extra fields from the query - SampleIndexDBAdaptor sampleIndexDBAdaptor = new SampleIndexDBAdaptor(null, null, metadataManager); + HBaseVariantTableNameGenerator tableNameGenerator = HBaseVariantTableNameGenerator + .fromVariantsTable(variantTable, job.getConfiguration()); + SampleIndexDBAdaptor sampleIndexDBAdaptor = new SampleIndexDBAdaptor(null, tableNameGenerator, metadataManager); SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); setSampleIndexConfiguration(job, sampleIndexQuery.getSchema().getConfiguration(), @@ -276,7 +278,9 @@ public static void initVariantRowMapperJob(Job job, Class Date: Fri, 15 Nov 2024 12:49:24 +0100 Subject: [PATCH 055/122] app: ensure all jobs have an attributes field, #TASK-7207 --- .../migrations/v3/v3_0_0/OrganizationMigration.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 0ff69f25a0..cf9675bfd7 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -579,9 +579,15 @@ private void changeFqns() throws CatalogDBException, MigrationException { )) ); + // Ensure all jobs have attributes field + Bson jobQuery = Filters.eq("attributes", null); + Bson update = new Document("$set", new Document("attributes", new Document())); + jobCollection.updateMany(jobQuery, update); + jobDeletedCollection.updateMany(jobQuery, update); + // Change fqn in all jobs that were pointing to this study - Bson jobQuery = Filters.eq("studyUid", studyUid); - Bson update = new Document("$set", new Document() + jobQuery = Filters.eq("studyUid", studyUid); + update = new Document("$set", new Document() .append("study.id", newFqn) .append("attributes.OPENCGA.3_0_0", new Document() .append("date", date) From d30dac985e292e01f3410c0e0fc8bfdfcd559db9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 15 Nov 2024 15:08:21 +0000 Subject: [PATCH 056/122] app: Always rename studies from storage when changing organization id. #TASK-7207 --- .../v3/v3_0_0/OrganizationMigration.java | 58 +++++++++++-------- .../core/models/project/DataStore.java | 1 + .../hadoop/app/VariantMetadataMain.java | 5 ++ 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index cf9675bfd7..9d6fb0b73c 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -7,6 +7,7 @@ import org.apache.commons.lang3.StringUtils; import org.bson.Document; import org.bson.conversions.Bson; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.mongodb.MongoDataStore; import org.opencb.commons.utils.CryptoUtils; import org.opencb.commons.utils.FileUtils; @@ -27,6 +28,7 @@ import org.opencb.opencga.catalog.utils.FqnUtils; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.api.ParamConstants; +import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; @@ -510,38 +512,46 @@ private void changeFqns() throws CatalogDBException, MigrationException { Document datastores = internal.get("datastores", Document.class); if (datastores != null) { Document variant = datastores.get("variant", Document.class); + DataStore dataStore; + boolean updateVariant = false; if (variant == null) { - DataStore dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldProjectFqn); - logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldProjectFqn); - - // Update only if the project exists in the variant storage - try (VariantStorageEngine variantStorageEngine = storageEngineFactory - .getVariantStorageEngine(dataStore.getStorageEngine(), dataStore.getDbName())) { - if (variantStorageEngine.getMetadataManager().exists()) { - logger.info("Project exists in the variant storage. Setting variant data store: {}", dataStore); + dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldProjectFqn); + updateVariant = true; + } else { + dataStore = JacksonUtils.getDefaultObjectMapper().convertValue(variant, DataStore.class); + } + // Update only if the project exists in the variant storage + try (VariantStorageEngine variantStorageEngine = storageEngineFactory + .getVariantStorageEngine(dataStore.getStorageEngine(), dataStore.getDbName())) { + logger.info("Project '{}' exists in the variant storage.", oldProjectFqn); + if (variantStorageEngine.getMetadataManager().exists()) { + if (updateVariant) { + logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldProjectFqn); set.append("internal.datastores.variant", new Document() .append("storageEngine", dataStore.getStorageEngine()) .append("dbName", dataStore.getDbName()) .append("options", new Document())); - - for (String oldStudyFqn : variantStorageEngine.getMetadataManager().getStudies().keySet()) { - String newStudyFqn = FqnUtils.buildFqn(this.organizationId, projectId, FqnUtils.parse(oldStudyFqn).getStudy()); - logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newStudyFqn); - variantStorageEngine.getMetadataManager().updateStudyMetadata(oldStudyFqn, studyMetadata -> { - studyMetadata.setName(newStudyFqn); - studyMetadata.getAttributes().put("OPENCGA.3_0_0", new Document() - .append("date", date) - .append("oldFqn", oldStudyFqn) - ); - }); - } } else { - logger.info("Project does not exist in the variant storage. Skipping"); + logger.info("Datastore variant at project '{}': {}", oldProjectFqn, datastores); } - } catch (StorageEngineException | IOException e) { - throw new RuntimeException(e); - } + for (String oldStudyFqn : variantStorageEngine.getMetadataManager().getStudies().keySet()) { + String study = FqnUtils.parse(oldStudyFqn).getStudy(); + String newStudyFqn = FqnUtils.buildFqn(this.organizationId, projectId, study); + logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newStudyFqn); + variantStorageEngine.getMetadataManager().updateStudyMetadata(oldStudyFqn, studyMetadata -> { + studyMetadata.setName(newStudyFqn); + studyMetadata.getAttributes().put("OPENCGA_3_0_0", new ObjectMap() + .append("date", date) + .append("oldFqn", oldStudyFqn) + ); + }); + } + } else { + logger.info("Project '{}' does not exist in the variant storage. Skipping", oldProjectFqn); + } + } catch (StorageEngineException | IOException e) { + throw new RuntimeException(e); } } } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/DataStore.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/DataStore.java index ac515e31b0..fa2534122a 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/DataStore.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/DataStore.java @@ -64,6 +64,7 @@ public String toString() { final StringBuilder sb = new StringBuilder("DataStore{"); sb.append("storageEngine='").append(storageEngine).append('\''); sb.append(", dbName='").append(dbName).append('\''); + sb.append(", options=").append(options); sb.append('}'); return sb.toString(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java index 202ab39f0b..de51bc4a99 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java @@ -8,6 +8,7 @@ import org.opencb.biodata.models.variant.VariantFileMetadata; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.tools.ToolParams; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -151,6 +152,10 @@ private void rename(String currentStudyName, String newStudyName) throws Storage int studyId = mm.getStudyId(currentStudyName); mm.updateStudyMetadata(studyId, studyMetadata -> { studyMetadata.setName(newStudyName); + studyMetadata.getAttributes().put("rename_" + TimeUtils.getTime(), new ObjectMap() + .append("newName", newStudyName) + .append("oldName", currentStudyName) + ); }); } } From b000ec7d20cdfed639f8783da7ed406df0d9a822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 18 Nov 2024 15:47:54 +0000 Subject: [PATCH 057/122] storage: Ensure variant-exports are sorted even from Phoenix. #TASK-6722 --- .../opencga/core/common/ExceptionUtils.java | 25 +++++++++- .../hadoop/utils/AbstractHBaseDriver.java | 47 +++++++++++++++++-- .../hadoop/variant/executors/MRExecutor.java | 2 +- .../hadoop/variant/io/VariantDriver.java | 5 ++ .../variant/mr/StreamVariantMapper.java | 14 +++++- .../variant/mr/VariantMapReduceUtil.java | 1 + 6 files changed, 86 insertions(+), 8 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/ExceptionUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/ExceptionUtils.java index 89e3a6fba4..d17034d553 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/ExceptionUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/ExceptionUtils.java @@ -58,7 +58,30 @@ private static StringBuilder prettyExceptionMessage(Throwable exception, StringB if (includeClassName) { message.append("[").append(exception.getClass().getSimpleName()).append("] "); } - message.append(exMessage); + String[] exMessageSubLines; + if (exMessage != null) { + exMessageSubLines = exMessage.split("\n"); + } else { + exMessageSubLines = new String[]{"null"}; + } + if (multiline) { + for (int i = 0; i < exMessageSubLines.length; i++) { + String exMessageSubLine = exMessageSubLines[i]; + if (i == 0) { + message.append(exMessageSubLine); + } else { + message.append(separator); + if (includeClassName) { + message.append(StringUtils.repeat(" ", exception.getClass().getSimpleName().length() + 3)); + } + message.append(exMessageSubLine); + } + } + } else { + for (String exMessageSubLine : exMessageSubLines) { + message.append(exMessageSubLine).append(" ; "); + } + } if (exception.getSuppressed().length > 0) { StringBuilder sb = new StringBuilder(); String intraSeparator = multiline ? separator + " " : separator; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index b374703990..b3bcbadbe8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -221,10 +221,11 @@ public final int run(String[] args) throws Exception { if (!succeed) { LOGGER.error("error with job!"); if (!"NA".equals(job.getStatus().getFailureInfo())) { - LOGGER.error("Failure info: " + job.getStatus().getFailureInfo()); - printKeyValue(ERROR_MESSAGE, job.getStatus().getFailureInfo()); + String errorMessage = job.getStatus().getFailureInfo().replace("\n", "\\n"); + errorMessage += getExtendedTaskErrorMessage(job); + LOGGER.error("Failure info: " + errorMessage.replace("\\n", "\n")); + printKeyValue(ERROR_MESSAGE, errorMessage); } - } LOGGER.info("================================================="); LOGGER.info("Finish job " + getJobName()); @@ -239,6 +240,43 @@ public final int run(String[] args) throws Exception { return succeed ? 0 : 1; } + private static String getExtendedTaskErrorMessage(Job job) { + try { + StringBuilder sb = new StringBuilder(); + int eventCounter = 0; + TaskCompletionEvent[] events; + do { + events = job.getTaskCompletionEvents(eventCounter, 10); + eventCounter += events.length; + for (TaskCompletionEvent event : events) { + if (event.getStatus() == TaskCompletionEvent.Status.FAILED) { + LOGGER.info(event.toString()); + // Displaying the task diagnostic information + TaskAttemptID taskId = event.getTaskAttemptId(); + String[] taskDiagnostics = job.getTaskDiagnostics(taskId); + if (taskDiagnostics != null) { + for (String diagnostics : taskDiagnostics) { + for (String diagnosticLine : diagnostics.split("\n")) { + if (diagnosticLine.contains("Error:") + || diagnosticLine.contains("Caused by:") + || diagnosticLine.contains("Suppressed:")) { + sb.append(diagnosticLine); + sb.append("\\n"); + } + } + } + } + } + } + } while (events.length > 0); + return sb.toString(); + } catch (Exception e) { + // Ignore + LOGGER.error("Error getting task diagnostics", e); + } + return ""; + } + private void reportRunningJobs() { if (getConf().getBoolean("storage.hadoop.mr.skipReportRunningJobs", false)) { LOGGER.info("Skip report running jobs"); @@ -316,8 +354,9 @@ private boolean executeJob(Job job) throws IOException, InterruptedException, Cl } }); try { - Runtime.getRuntime().addShutdownHook(hook); job.submit(); + // Add shutdown hook after successfully submitting the job. + Runtime.getRuntime().addShutdownHook(hook); JobID jobID = job.getJobID(); String applicationId = jobID.appendTo(new StringBuilder(ApplicationId.appIdStrPrefix)).toString(); printKeyValue(MR_APPLICATION_ID, applicationId); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java index a8d9b03745..f8c85a813f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java @@ -121,7 +121,7 @@ public ObjectMap run(Class execClass, String[] args, String if (exitValue != 0) { String message = "Error executing MapReduce for : \"" + taskDescription + "\""; if (StringUtils.isNotEmpty(result.getErrorMessage())) { - message += " : " + result.getErrorMessage(); + message += " : " + result.getErrorMessage().replace("\\n", "\n"); } else { message += " : Unidentified error executing MapReduce job. Check logs for more information."; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index 4a78cb70f6..8916c5242a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -56,6 +56,11 @@ protected void parseAndValidateParameters() throws IOException { getQueryFromConfig(query, getConf()); getQueryOptionsFromConfig(options, getConf()); + if (!options.getBoolean(QueryOptions.SORT)) { + // Unsorted results might break the file generation. + // Results from HBase are always sorted, but when reading from Phoenix, some results might be out of order. + options.put(QueryOptions.SORT, true); + } logger.info(" * Query:"); for (Map.Entry entry : query.entrySet()) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index b490f25150..5132d49a7b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -365,8 +365,18 @@ private void startProcess(Context context) throws IOException, StorageEngineExce context.getCounter(COUNTER_GROUP_NAME, "start_process").increment(1); Variant variant = context.getCurrentValue(); - currentChromosome = variant.getChromosome(); - currentPosition = variant.getStart(); + if (variant.getChromosome().equals(currentChromosome)) { + if (currentPosition >= variant.getStart()) { + // Multiple variants might point to the same locus + // In that case, simply increment the position + currentPosition++; + } else { + currentPosition = variant.getStart(); + } + } else { + currentChromosome = variant.getChromosome(); + currentPosition = variant.getStart(); + } if (firstVariant == null) { firstVariant = variant.getChromosome() + ":" + variant.getStart(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index c55dfa6282..77786498ed 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -496,6 +496,7 @@ public static QueryOptions getQueryOptionsFromConfig(Configuration conf) { public static void getQueryOptionsFromConfig(QueryOptions options, Configuration conf) { options.put(QueryOptions.INCLUDE, conf.get(QueryOptions.INCLUDE)); options.put(QueryOptions.EXCLUDE, conf.get(QueryOptions.EXCLUDE)); + options.put(QueryOptions.SORT, conf.get(QueryOptions.SORT)); } public static Query getQueryFromConfig(Configuration conf) { From 3e7089a8a0efe2994c21630520c3cd2bb6b66893 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 21 Nov 2024 17:52:50 +0100 Subject: [PATCH 058/122] Prepare release 3.0.2 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 4 ++-- 27 files changed, 28 insertions(+), 28 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index b87b735227..99cdde7b21 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index 9888d9eff1..9c5743a6c3 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 9b90d1abe8..449d5a615b 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index e79ddeca00..aeff862c5c 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index d14ae03e2d..244255c3e0 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 7fdcef9359..f82141f153 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 4b28c21116..e6ada2e6d2 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 2b17811efd..083058f829 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 2ea3d81f94..da3d0f0118 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index d9eac96485..6f7a1d0473 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 4c272f5582..4476c2ec63 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index d8175c811e..ca885fa290 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index b9497bf885..90de830d5e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index 2e8fc54913..2d4470a7c9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 4c3be0aacf..9cebc36634 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 3f28c2c77a..4e53543f6c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 016818d607..3d2d25a078 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index fa81312adc..1fb373a58b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 94fd573d1c..976b6942c1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index 78becbf80e..e90d48ec48 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 5f7d8651c9..893010fb72 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index 01b610468f..0e15f9c746 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 9ef4161339..29b54b3404 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 8fc3460884..e91230d5af 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 9298492d10..46b8ac2126 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 923d0bc200..6d68a97aed 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 ../pom.xml diff --git a/pom.xml b/pom.xml index f68aa475a8..829c7358ee 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2-SNAPSHOT + 3.0.2 pom OpenCGA @@ -43,7 +43,7 @@ - 3.0.2 + 3.0.2 6.0.0 3.0.0 From 0a741d5a1c3acd2eadca5614261bad965a2837d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 25 Nov 2024 13:14:11 +0000 Subject: [PATCH 059/122] storage: Use HDFS to store intermediate MapReduce files. Concat locally. #TASK-6722 --- .../main/resources/storage-configuration.yml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 4 + .../storage/hadoop/app/HadoopMain.java | 139 +++++ .../opencga/storage/hadoop/app/Main.java | 3 + .../hadoop/utils/AbstractHBaseDriver.java | 338 +------------ .../hadoop/utils/DeleteHBaseColumnDriver.java | 2 +- .../hadoop/utils/MapReduceOutputFile.java | 478 ++++++++++++++++++ .../variant/AbstractVariantsTableDriver.java | 23 + .../variant/HadoopVariantStorageEngine.java | 2 +- .../VariantTableAggregationDriver.java | 40 +- .../analysis/gwas/FisherTestDriver.java | 40 +- .../hadoop/variant/executors/MRExecutor.java | 7 +- .../variant/executors/MRExecutorFactory.java | 19 +- .../variant/executors/SshMRExecutor.java | 91 +++- .../variant/io/HadoopVariantExporter.java | 5 +- .../hadoop/variant/io/VariantDriver.java | 5 +- .../variant/io/VariantExporterDriver.java | 4 +- .../variant/mr/StreamVariantDriver.java | 4 +- .../variant/mr/VariantMapReduceUtil.java | 4 +- .../variant/prune/VariantPruneDriver.java | 7 +- .../stats/CohortVariantStatsDriver.java | 3 - .../stats/SampleVariantStatsDriver.java | 2 +- .../variant/stats/VariantStatsDriver.java | 11 +- .../variant/executors/SshMRExecutorTest.java | 7 +- pom.xml | 2 +- 25 files changed, 796 insertions(+), 446 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/HadoopMain.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java diff --git a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml index f422770d9b..dfa6865eb4 100644 --- a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml +++ b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml @@ -177,7 +177,7 @@ variant: storage.hadoop.mr.executor.ssh.user: "" # Hadoop edge node user name #storage.hadoop.mr.executor.ssh.key: "~/.ssh/id_rsa" # Hadoop edge node ssh-key file storage.hadoop.mr.executor.ssh.password: "" # Hadoop edge node password. Only if ssh-key is not present. Requires sshpass to run - storage.hadoop.mr.executor.ssh.remoteOpenCgaHome: # Remote opencga home location. Only if different than local location. + storage.hadoop.mr.executor.ssh.remoteOpenCgaHome: # Remote opencga home location. Only if different from local location. storage.hadoop.mr.executor.ssh.terminationGracePeriodSeconds: 120 # Termination grace period in seconds for the ssh executor. # Increase the ScannerTimeoutPeriod from 60000 (1min) to 300000 (5min) to avoid ScannerTimeoutExceptions diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index a024b4ec3f..1d0a1cb302 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -212,6 +212,10 @@ com.google.guava guava + + org.xerial.snappy + snappy-java + org.apache.parquet parquet-avro diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/HadoopMain.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/HadoopMain.java new file mode 100644 index 0000000000..c5035c575c --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/HadoopMain.java @@ -0,0 +1,139 @@ +package org.opencb.opencga.storage.hadoop.app; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.util.ReflectionUtils; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.IOUtils; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.Map; + +public class HadoopMain extends AbstractMain { + + + @Override + protected void run(String[] args) throws Exception { + new HadoopCommandExecutor().exec(args); + } + + + public static class HadoopCommandExecutor extends NestedCommandExecutor { +// private HBaseManager hBaseManager; + private Configuration conf; + + public HadoopCommandExecutor() { + this(""); + } + + public HadoopCommandExecutor(String context) { + super(context); + addSubCommand(Arrays.asList("hdfs-ls", "ls"), + " [-f ] [-D key=value] : List the content of an hdfs path", this::hdfsLs); + addSubCommand(Arrays.asList("hdfs-info", "info", "st"), + " [-f ] [-D key=value] : FS information", this::info); + addSubCommand(Collections.singletonList("codec-info"), + " [-c ] [-D key=value] : Codec information", this::codecInfo); + } + + @Override + protected void setup(String command, String[] args) throws Exception { + conf = new Configuration(); + } + + @Override + protected void cleanup(String command, String[] args) throws Exception { + } + + private void hdfsLs(String[] args) throws Exception { + ObjectMap map = getArgsMap(args, "f", "D"); + String path = map.getString("f", FileSystem.getDefaultUri(conf).toString()); + addDynamic(map); + + try (FileSystem fs = FileSystem.get(new Path(path).toUri(), conf)) { + RemoteIterator iterator = fs.listFiles(new Path(path), true); + while (iterator.hasNext()) { + LocatedFileStatus file = iterator.next(); + println("- " + file.getPath().toUri() + " : " + IOUtils.humanReadableByteCount(file.getLen(), false)); + } + } + } + + private void info(String[] args) throws Exception { + ObjectMap map = getArgsMap(args, "f", "D"); + String path = map.getString("f", FileSystem.getDefaultUri(conf).toString()); + addDynamic(map); + + try (FileSystem fs = FileSystem.get(new Path(path).toUri(), conf)) { + info(fs); + } + } + + private void addDynamic(ObjectMap map) { + Map dynamic = map.getMap("D", Collections.emptyMap()); + if (dynamic != null) { + for (Map.Entry entry : dynamic.entrySet()) { + conf.set(entry.getKey(), entry.getValue().toString()); + } + } + } + + private void info(FileSystem fs) throws Exception { + println("fs.getScheme() = " + fs.getScheme()); + println("fs.getUri() = " + fs.getUri()); + println("fs.getHomeDirectory() = " + fs.getHomeDirectory()); + println("fs.getWorkingDirectory() = " + fs.getWorkingDirectory()); + println("fs.getConf() = " + fs.getConf()); + println("fs.getCanonicalServiceName() = " + fs.getCanonicalServiceName()); + FsStatus status = fs.getStatus(); + println("status.getCapacity() = " + IOUtils.humanReadableByteCount(status.getCapacity(), false)); + println("status.getRemaining() = " + IOUtils.humanReadableByteCount(status.getRemaining(), false)); + println("status.getUsed() = " + IOUtils.humanReadableByteCount(status.getUsed(), false)); + } + + private void codecInfo(String[] args) throws Exception { + ObjectMap map = getArgsMap(args, "c", "D"); + String codecName = map.getString("c", "deflate"); + addDynamic(map); + + CompressionCodec codec; + try { + Class aClass = Class.forName(codecName); + codec = (CompressionCodec) ReflectionUtils.newInstance(aClass, conf); + } catch (ClassNotFoundException | ClassCastException e) { + codec = MapReduceOutputFile.getCompressionCodec(codecName, conf); + } + println("Codec name : " + codecName); + if (codec == null) { + println("Codec not found!"); + } else { + println("Codec class : " + codec.getClass()); + println("Default extension : " + codec.getDefaultExtension()); + println("Compressor type : " + codec.getCompressorType()); + println("Decompressor type : " + codec.getDecompressorType()); + int rawSize = 1024 * 1024 * 10; + InputStream is = new ByteArrayInputStream(RandomStringUtils.randomAlphanumeric(rawSize).getBytes(StandardCharsets.UTF_8)); + ByteArrayOutputStream byteOs = new ByteArrayOutputStream(rawSize); + OutputStream os = codec.createOutputStream(byteOs); + org.apache.commons.io.IOUtils.copy(is, os); + int compressedSize = byteOs.size(); + + println("Compression rate : " + + IOUtils.humanReadableByteCount(rawSize, false) + "(" + rawSize + "B) " + + "-> " + + IOUtils.humanReadableByteCount(compressedSize, false) + "(" + compressedSize + "B) " + + String.format("%.3f", ((double) compressedSize) / ((double) rawSize))); + os.close(); + } + } + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/Main.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/Main.java index 017590c3f8..d5d829c6cf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/Main.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/Main.java @@ -28,6 +28,9 @@ public static void main(String[] mainArgs) throws Exception { executor.addSubCommand(Arrays.asList("convertintovirtual", "ConvertIntoVirtual"), "Migrate into virtual file", args -> { new ConvertIntoVirtual().run(args); }); + executor.addSubCommand(Arrays.asList("hadoop", "hdfs"), "Run hadoop commands", args -> { + new HadoopMain().run(args); + }); executor.exec(mainArgs); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java index b3bcbadbe8..787b10648d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/AbstractHBaseDriver.java @@ -1,27 +1,22 @@ package org.opencb.opencga.storage.hadoop.utils; -import org.apache.commons.io.input.ReaderInputStream; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.compress.*; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -29,29 +24,20 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.ExceptionUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; -import org.opencb.opencga.storage.hadoop.io.HDFSIOConnector; -import org.opencb.opencga.storage.hadoop.variant.executors.SshMRExecutor; import org.opencb.opencga.storage.hadoop.variant.mr.AbstractHBaseVariantTableInputFormat; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; -import java.net.URI; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Paths; +import java.io.IOException; import java.util.*; -import java.util.function.Supplier; import java.util.stream.Collectors; -import static org.opencb.opencga.core.common.IOUtils.humanReadableByteCount; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.MR_EXECUTOR_SSH_PASSWORD; /** @@ -64,6 +50,7 @@ public abstract class AbstractHBaseDriver extends Configured implements Tool { public static final String COLUMNS_TO_COUNT = "columns_to_count"; public static final String MR_APPLICATION_ID = "MR_APPLICATION_ID"; public static final String ERROR_MESSAGE = "ERROR_MESSAGE"; + public static final String OUTPUT_PARAM = "output"; private static final Logger LOGGER = LoggerFactory.getLogger(AbstractHBaseDriver.class); protected String table; @@ -382,91 +369,6 @@ protected static void printKeyValue(String key, Object value) { System.err.println(key + "=" + value); } - protected boolean isLocal(Path path) { - return HDFSIOConnector.isLocal(path.toUri(), getConf()); - } - - protected Path getTempOutdir(String prefix) throws IOException { - return getTempOutdir(prefix, ""); - } - - protected Path getTempOutdir(String prefix, String suffix) throws IOException { - return getTempOutdir(prefix, suffix, false); - } - - protected Path getTempOutdir(String prefix, String suffix, boolean ensureHdfs) throws IOException { - if (StringUtils.isEmpty(suffix)) { - suffix = ""; - } else if (!suffix.startsWith(".")) { - suffix = "." + suffix; - } - // Be aware that - // > ABFS does not allow files or directories to end with a dot. - String fileName = prefix + "." + TimeUtils.getTime() + suffix; - - Path tmpDir = new Path(getConf().get("hadoop.tmp.dir")); - if (ensureHdfs) { - FileSystem fileSystem = tmpDir.getFileSystem(getConf()); - if (!fileSystem.getScheme().equals("hdfs")) { - LOGGER.info("Temporary directory is not in hdfs:// . Hdfs is required for this temporary file."); - LOGGER.info(" Default file system : " + fileSystem.getUri()); - for (String nameServiceId : getConf().getTrimmedStringCollection("dfs.nameservices")) { - try { - Path hdfsTmpPath = new Path("hdfs", nameServiceId, "/tmp/"); - FileSystem hdfsFileSystem = hdfsTmpPath.getFileSystem(getConf()); - if (hdfsFileSystem != null) { - LOGGER.info("Change to file system : " + hdfsFileSystem.getUri()); - tmpDir = hdfsTmpPath; - break; - } - } catch (Exception e) { - LOGGER.debug("This file system is not hdfs:// . Skip!", e); - } - } - } - } - return new Path(tmpDir, fileName); - } - - private URI toUri(Path path) throws IOException { - URI tmpUri = path.toUri(); - if (tmpUri.getScheme() == null) { - // If the scheme is null, add the default scheme - FileSystem fileSystem = path.getFileSystem(getConf()); - tmpUri = fileSystem.getUri().resolve(tmpUri.getPath()); - } - return tmpUri; - } - - protected Path getLocalOutput(Path outdir) throws IOException { - return getLocalOutput(outdir, () -> null); - } - - protected Path getLocalOutput(Path outdir, Supplier nameGenerator) throws IOException { - if (!isLocal(outdir)) { - throw new IllegalArgumentException("Outdir " + outdir + " is not in the local filesystem"); - } - Path localOutput = outdir; - FileSystem localFs = localOutput.getFileSystem(getConf()); - if (localFs.exists(localOutput)) { - if (localFs.isDirectory(localOutput)) { - String name = nameGenerator.get(); - if (StringUtils.isEmpty(name)) { - throw new IllegalArgumentException("Local output '" + localOutput + "' is a directory"); - } - localOutput = new Path(localOutput, name); - } else { - throw new IllegalArgumentException("File '" + localOutput + "' already exists!"); - } - } else { - if (!localFs.exists(localOutput.getParent())) { - Files.createDirectories(Paths.get(localOutput.getParent().toUri())); -// throw new IOException("No such file or directory: " + localOutput); - } - } - return localOutput; - } - protected void deleteTemporaryFile(Path outdir) throws IOException { LOGGER.info("Delete temporary file " + outdir.toUri()); FileSystem fileSystem = outdir.getFileSystem(getConf()); @@ -475,238 +377,6 @@ protected void deleteTemporaryFile(Path outdir) throws IOException { LOGGER.info("Temporary file deleted!"); } - public class MapReduceOutputFile { - public static final String OUTPUT_PARAM = "output"; - - private final Supplier nameGenerator; - private final String tempFilePrefix; - private final Map extraFiles = new HashMap<>(); - private String namedOutput; - protected Path localOutput; - protected Path outdir; - - public MapReduceOutputFile(String tempFilePrefix) throws IOException { - this.nameGenerator = () -> null; - this.tempFilePrefix = tempFilePrefix; - initOutputPath(); - namedOutput = null; - } - - public MapReduceOutputFile(Supplier nameGenerator, String tempFilePrefix) throws IOException { - this.nameGenerator = nameGenerator; - this.tempFilePrefix = tempFilePrefix; - initOutputPath(); - namedOutput = null; - } - - private void initOutputPath() throws IOException { - String outdirStr = getParam(OUTPUT_PARAM); - if (StringUtils.isNotEmpty(outdirStr)) { - outdir = new Path(outdirStr); - - if (isLocal(outdir)) { - localOutput = AbstractHBaseDriver.this.getLocalOutput(outdir, nameGenerator); - outdir = getTempOutdir(tempFilePrefix, localOutput.getName()); - outdir.getFileSystem(getConf()).deleteOnExit(outdir); - } - if (hasTempOutput()) { - LOGGER.info(" * Output file : " + toUri(localOutput)); - LOGGER.info(" * Temporary outdir : " + toUri(outdir)); - } else { - LOGGER.info(" * Outdir: " + toUri(outdir)); - } - } - } - - public void postExecute(boolean succeed) throws IOException { - if (succeed) { - if (hasTempOutput()) { - getConcatMrOutputToLocal(); - } - } - if (hasTempOutput()) { - deleteTemporaryFile(outdir); - } - } - - public boolean hasTempOutput() { - return localOutput != null; - } - - public MapReduceOutputFile setNamedOutput(String partFilePrefix) { - this.namedOutput = partFilePrefix; - return this; - } - - public void addExtraNamedOutput(String namedOutput, String localOutputPrefix) { - extraFiles.put(namedOutput, localOutputPrefix); - } - - protected void getConcatMrOutputToLocal() throws IOException { - concatMrOutputToLocal(outdir, localOutput, true, namedOutput); - - for (Map.Entry entry : extraFiles.entrySet()) { - String suffix = entry.getValue(); - String partFilePrefix = entry.getKey(); - Path extraOutput = localOutput.suffix(suffix); - concatMrOutputToLocal(outdir, extraOutput, true, partFilePrefix); - printKeyValue(SshMRExecutor.EXTRA_OUTPUT_PREFIX + partFilePrefix.toUpperCase(), extraOutput); - } - } - - public Path getLocalOutput() { - return localOutput; - } - - public Path getOutdir() { - return outdir; - } - } - - /** - * Concatenate all generated files from a MapReduce job into one single local file. - * - * @param mrOutdir MapReduce output directory - * @param localOutput Local file - * @throws IOException on IOException - * @return List of copied files from HDFS - */ - protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput) throws IOException { - return concatMrOutputToLocal(mrOutdir, localOutput, true, null); - } - - /** - * Concatenate all generated files from a MapReduce job into one single local file. - * - * @param mrOutdir MapReduce output directory - * @param localOutput Local file - * @param removeExtraHeaders Remove header lines starting with "#" from all files but the first - * @param partFilePrefix Filter partial files with specific prefix. Otherwise, concat them all. - * @throws IOException on IOException - * @return List of copied files from HDFS - */ - protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, boolean removeExtraHeaders, String partFilePrefix) - throws IOException { - // TODO: Allow copy output to any IOConnector - FileSystem fileSystem = mrOutdir.getFileSystem(getConf()); - RemoteIterator it = fileSystem.listFiles(mrOutdir, false); - List paths = new ArrayList<>(); - while (it.hasNext()) { - LocatedFileStatus status = it.next(); - Path path = status.getPath(); - if (status.isFile() - && !path.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME) - && !path.getName().equals(FileOutputCommitter.PENDING_DIR_NAME) - && !path.getName().equals(ParquetFileWriter.PARQUET_METADATA_FILE) - && !path.getName().equals(ParquetFileWriter.PARQUET_COMMON_METADATA_FILE) - && status.getLen() > 0) { - if (partFilePrefix == null || path.getName().startsWith(partFilePrefix)) { - paths.add(path); - } - } - } - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - if (paths.isEmpty()) { - LOGGER.warn("The MapReduce job didn't produce any output. This may not be expected."); - } else if (paths.size() == 1) { - LOGGER.info("Copy to local file"); - LOGGER.info(" Source : {} ({})", - paths.get(0).toUri(), humanReadableByteCount(fileSystem.getFileStatus(paths.get(0)).getLen(), false)); - LOGGER.info(" Target : {}", localOutput.toUri()); - fileSystem.copyToLocalFile(false, paths.get(0), localOutput); - } else { - LOGGER.info("Concat and copy to local : " + paths.size() + " partial files"); - LOGGER.info(" Source {}: {}", getCompression(paths.get(0).getName()), mrOutdir.toUri()); - LOGGER.info(" Target {}: {}", getCompression(localOutput.getName()), localOutput.toUri()); - LOGGER.info(" ---- "); - - try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localOutput.getFileSystem(getConf()).create(localOutput))) { - for (int i = 0; i < paths.size(); i++) { - Path path = paths.get(i); - LOGGER.info("[{}] Concat {} file : '{}' ({}) ", - i, - getCompression(path.getName()), - path.toUri(), - humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); - try (InputStream isAux = getInputStream(path.getName(), fileSystem.open(path))) { - InputStream is = isAux; - // Remove extra headers from all files but the first - if (removeExtraHeaders && i != 0) { - BufferedReader br = new BufferedReader(new InputStreamReader(is)); - String line; - do { - br.mark(10 * 1024 * 1024); //10MB - line = br.readLine(); - // Skip blank lines and - } while (line != null && (StringUtils.isBlank(line) || line.startsWith("#"))); - br.reset(); - is = new ReaderInputStream(br, Charset.defaultCharset()); - } - - IOUtils.copyBytes(is, os, getConf(), false); - } - } - } - LOGGER.info("File size : " + humanReadableByteCount(Files.size(Paths.get(localOutput.toUri())), false)); - LOGGER.info("Time to copy from HDFS and concat : " + TimeUtils.durationToString(stopWatch)); - } - return paths; - } - - private static String getCompression(String name) throws IOException { - if (name.endsWith(".gz")) { - return "gzip"; - } else if (name.endsWith(".snappy")) { - return "snappy"; - } else if (name.endsWith(".lz4")) { - return "lz4"; - } else if (name.endsWith(".zst")) { - return "ztandard"; - } else { - return "plain"; - } - } - - private OutputStream getOutputStreamPlain(String name, OutputStream fsOs) throws IOException { - CompressionCodec codec = getCompressionCodec(name); - if (codec == null) { - return fsOs; - } - return codec.createOutputStream(fsOs); - } - - private CompressionCodec getCompressionCodec(String name) throws IOException { - Class codecClass; - switch (getCompression(name)) { - case "gzip": - codecClass = GzipCodec.class; - break; - case "snappy": - codecClass = SnappyCodec.class; - break; - case "lz4": - codecClass = Lz4Codec.class; - break; - case "ztandard": - codecClass = ZStandardCodec.class; - break; - case "plain": - return null; - default: - throw new IOException("Unknown compression codec for file " + name); - } - return ReflectionUtils.newInstance(codecClass, getConf()); - } - - private InputStream getInputStream(String name, InputStream is) throws IOException { - CompressionCodec codec = getCompressionCodec(name); - if (codec == null) { - return is; - } - return codec.createInputStream(is); - } - protected final int getServersSize(String table) throws IOException { int serversSize; try (HBaseManager hBaseManager = new HBaseManager(getConf())) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/DeleteHBaseColumnDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/DeleteHBaseColumnDriver.java index 53473a977b..f9b3b57646 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/DeleteHBaseColumnDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/DeleteHBaseColumnDriver.java @@ -109,7 +109,7 @@ public void setupJob(Job job, String table) throws IOException { VariantMapReduceUtil.setNoneReduce(job); } else { VariantMapReduceUtil.initTableMapperJob(job, table, scans, DeleteHBaseColumnToProtoMapper.class); - outdir = getTempOutdir("opencga_delete", table, true); + outdir = MapReduceOutputFile.getTempOutdir("opencga_delete", table, true, getConf()); outdir.getFileSystem(getConf()).deleteOnExit(outdir); LOGGER.info(" * Temporary outdir file: " + outdir.toUri()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java new file mode 100644 index 0000000000..e91be76f97 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java @@ -0,0 +1,478 @@ +package org.opencb.opencga.storage.hadoop.utils; + +import org.apache.commons.io.input.ReaderInputStream; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.StopWatch; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.compress.*; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.parquet.hadoop.ParquetFileWriter; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.TimeUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xerial.snappy.SnappyInputStream; +import org.xerial.snappy.SnappyOutputStream; + +import java.io.*; +import java.net.URI; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +import static org.opencb.opencga.core.common.IOUtils.humanReadableByteCount; + +public class MapReduceOutputFile { + + public static final String EXTRA_OUTPUT_PREFIX = "EXTRA_OUTPUT_"; + public static final String NAMED_OUTPUT = "NAMED_OUTPUT"; + public static final String EXTRA_NAMED_OUTPUT_PREFIX = "EXTRA_NAMED_OUTPUT_"; + private static final Logger LOGGER = LoggerFactory.getLogger(MapReduceOutputFile.class); + + private final Configuration conf; + private final Supplier nameGenerator; + private final Map extraFiles = new HashMap<>(); + private String namedOutput; + protected Path localOutput; + protected Path outdir; + + public MapReduceOutputFile(String outdirStr, String tempFilePrefix, Configuration conf) throws IOException { + this(outdirStr, null, tempFilePrefix, conf); + } + + public MapReduceOutputFile(String outdirStr, Supplier nameGenerator, String tempFilePrefix, + Configuration conf) throws IOException { + this(outdirStr, nameGenerator, tempFilePrefix, false, conf); + } + + public MapReduceOutputFile(String outdirStr, Supplier nameGenerator, String tempFilePrefix, boolean ensureHdfs, + Configuration conf) throws IOException { + this.conf = conf; + this.nameGenerator = nameGenerator == null ? () -> null : nameGenerator; + namedOutput = null; + + outdir = new Path(outdirStr); + + if (isLocal(outdir)) { + localOutput = getLocalOutput(outdir); + outdir = getTempOutdir(tempFilePrefix, localOutput.getName(), ensureHdfs, conf); + outdir.getFileSystem(conf).deleteOnExit(outdir); + } + if (hasTempOutput()) { + LOGGER.info(" * Output file : " + toUri(localOutput)); + LOGGER.info(" * MapReduce outdir : " + toUri(outdir)); + } else { + LOGGER.info(" * MapReduce outdir : " + toUri(outdir)); + } + } + + public static Path getTempOutdir(String prefix, String suffix, boolean ensureHdfs, Configuration conf) throws IOException { + if (StringUtils.isEmpty(suffix)) { + suffix = ""; + } else if (!suffix.startsWith(".")) { + suffix = "." + suffix; + } + // Be aware that + // > ABFS does not allow files or directories to end with a dot. + String fileName = prefix + "." + TimeUtils.getTime() + suffix; + + Path tmpDir = new Path(conf.get("hadoop.tmp.dir")); + if (ensureHdfs) { + if (!isHdfs(tmpDir, conf)) { + LOGGER.info("Temporary directory is not in hdfs:// . Hdfs is required for this temporary file."); + LOGGER.info(" Default file system : " + FileSystem.getDefaultUri(conf)); + for (String nameServiceId : conf.getTrimmedStringCollection("dfs.nameservices")) { + try { + Path hdfsTmpPath = new Path("hdfs", nameServiceId, "/tmp/"); + FileSystem hdfsFileSystem = hdfsTmpPath.getFileSystem(conf); + if (hdfsFileSystem != null) { + LOGGER.info("Change to file system : " + hdfsFileSystem.getUri()); + tmpDir = hdfsTmpPath; + break; + } + } catch (Exception e) { + LOGGER.debug("This file system is not hdfs:// . Skip!", e); + } + } + } + } + return new Path(tmpDir, fileName); + } + + /** + * Check if a given Hadoop path is local. + * If the scheme is null, it will check the default hadoop file system. + * @param path Hadoop path + * @return true if the path is local + */ + protected boolean isLocal(Path path) { + URI uri = path.toUri(); + String scheme = uri.getScheme(); + if (StringUtils.isEmpty(scheme)) { + scheme = FileSystem.getDefaultUri(conf).getScheme(); + } + return "file".equals(scheme); + } + + /** + * Check if a given URI is local. + * If the scheme is null, it assumes it is local. + * @param uri URI + * @return true if the URI is local + */ + public static boolean isLocal(URI uri) { + String scheme = uri.getScheme(); + if (StringUtils.isEmpty(scheme)) { + scheme = "file"; + } + return StringUtils.isEmpty(scheme) || "file".equals(scheme); + } + + public static boolean isHdfs(Path dir, Configuration conf) { + try { + String scheme = dir.toUri().getScheme(); + if (StringUtils.isEmpty(scheme)) { + scheme = FileSystem.getDefaultUri(conf).getScheme(); + return scheme.equals("hdfs"); + } + FileSystem fileSystem = dir.getFileSystem(conf); + return fileSystem.getScheme().equals("hdfs"); + } catch (IOException e) { + LOGGER.error("Error checking if " + dir + " is HDFS : " + e.getMessage()); + return false; + } + } + + public void postExecute(ObjectMap result, boolean succeed) throws IOException { + readKeyValues(result); + postExecute(succeed); + } + + public void postExecute(boolean succeed) throws IOException { + printKeyValue(); + if (succeed) { + if (hasTempOutput()) { + getConcatMrOutputToLocal(); + } + } + if (hasTempOutput()) { + deleteTemporaryFile(outdir); + } + } + + private void readKeyValues(ObjectMap result) { + for (String key : result.keySet()) { + if (key.equals(MapReduceOutputFile.NAMED_OUTPUT)) { + setNamedOutput(result.getString(key)); + } else if (key.startsWith(MapReduceOutputFile.EXTRA_NAMED_OUTPUT_PREFIX)) { + addExtraNamedOutput(key.substring(MapReduceOutputFile.EXTRA_NAMED_OUTPUT_PREFIX.length()), result.getString(key)); + } + } + } + + private void printKeyValue() { + // Print keyValues only if this method is being called from an instance of AbstractHBaseDriver + // Check the stacktrace + boolean found = false; + StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); + for (StackTraceElement stackTraceElement : stackTrace) { + try { + Class aClass = Class.forName(stackTraceElement.getClassName()); + if (AbstractHBaseDriver.class.isAssignableFrom(aClass)) { + found = true; + break; + } + } catch (ClassNotFoundException e) { + // This should never happen + throw new RuntimeException(e); + } + } + if (!found) { + return; + } + + if (namedOutput != null) { + AbstractHBaseDriver.printKeyValue(NAMED_OUTPUT, namedOutput); + } + for (Map.Entry entry : extraFiles.entrySet()) { + String suffix = entry.getValue(); + String partFilePrefix = entry.getKey(); + if (hasTempOutput()) { + Path extraOutput = localOutput.suffix(suffix); + AbstractHBaseDriver.printKeyValue(EXTRA_OUTPUT_PREFIX + partFilePrefix, extraOutput); + } else { + AbstractHBaseDriver.printKeyValue(EXTRA_NAMED_OUTPUT_PREFIX + partFilePrefix, suffix); + } + } + } + + public boolean hasTempOutput() { + return localOutput != null; + } + + public MapReduceOutputFile setNamedOutput(String partFilePrefix) { + this.namedOutput = partFilePrefix; + return this; + } + + public void addExtraNamedOutput(String namedOutput, String localOutputPrefix) { + extraFiles.put(namedOutput, localOutputPrefix); + } + + protected void getConcatMrOutputToLocal() throws IOException { + concatMrOutputToLocal(outdir, localOutput, true, namedOutput); + + for (Map.Entry entry : extraFiles.entrySet()) { + String partFilePrefix = entry.getKey(); + String suffix = entry.getValue(); + Path extraOutput = localOutput.suffix(suffix); + concatMrOutputToLocal(outdir, extraOutput, true, partFilePrefix); + AbstractHBaseDriver.printKeyValue(EXTRA_OUTPUT_PREFIX + partFilePrefix.toUpperCase(), extraOutput); + } + } + + /** + * Get the local output file. Might be null if the destination is HDFS. + * @return Local output file + */ + public Path getLocalOutput() { + return localOutput; + } + + /** + * Get the actual output directory for the MapReduce job. + * @return Output directory + */ + public Path getOutdir() { + return outdir; + } + + public Configuration getConf() { + return conf; + } + + private URI toUri(Path path) throws IOException { + URI tmpUri = path.toUri(); + if (tmpUri.getScheme() == null) { + // If the scheme is null, add the default scheme + FileSystem fileSystem = path.getFileSystem(conf); + tmpUri = fileSystem.getUri().resolve(tmpUri.getPath()); + } + return tmpUri; + } + + protected Path getLocalOutput(Path outdir) throws IOException { + if (!isLocal(outdir)) { + throw new IllegalArgumentException("Outdir " + outdir + " is not in the local filesystem"); + } + Path localOutput = outdir; + FileSystem localFs = localOutput.getFileSystem(conf); + if (localFs.exists(localOutput)) { + if (localFs.isDirectory(localOutput)) { + String name = nameGenerator.get(); + if (StringUtils.isEmpty(name)) { + throw new IllegalArgumentException("Local output '" + localOutput + "' is a directory"); + } + localOutput = new Path(localOutput, name); + } else { + throw new IllegalArgumentException("File '" + localOutput + "' already exists!"); + } + } else { + if (!localFs.exists(localOutput.getParent())) { + Files.createDirectories(Paths.get(localOutput.getParent().toUri())); +// throw new IOException("No such file or directory: " + localOutput); + } + } + return localOutput; + } + + protected void deleteTemporaryFile(Path outdir) throws IOException { + LOGGER.info("Delete temporary file " + outdir.toUri()); + FileSystem fileSystem = outdir.getFileSystem(conf); + fileSystem.delete(outdir, true); + fileSystem.cancelDeleteOnExit(outdir); + LOGGER.info("Temporary file deleted!"); + } + + /** + * Concatenate all generated files from a MapReduce job into one single local file. + * + * @param mrOutdir MapReduce output directory + * @param localOutput Local file + * @return List of copied files from HDFS + * @throws IOException on IOException + */ + protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput) throws IOException { + return concatMrOutputToLocal(mrOutdir, localOutput, true, null); + } + + /** + * Concatenate all generated files from a MapReduce job into one single local file. + * + * @param mrOutdir MapReduce output directory + * @param localOutput Local file + * @param removeExtraHeaders Remove header lines starting with "#" from all files but the first + * @param partFilePrefix Filter partial files with specific prefix. Otherwise, concat them all. + * @return List of copied files from HDFS + * @throws IOException on IOException + */ + protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, boolean removeExtraHeaders, String partFilePrefix) + throws IOException { + // TODO: Allow copy output to any IOConnector + FileSystem fileSystem = mrOutdir.getFileSystem(getConf()); + RemoteIterator it = fileSystem.listFiles(mrOutdir, false); + List paths = new ArrayList<>(); + while (it.hasNext()) { + LocatedFileStatus status = it.next(); + Path path = status.getPath(); + if (status.isFile() + && !path.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME) + && !path.getName().equals(FileOutputCommitter.PENDING_DIR_NAME) + && !path.getName().equals(ParquetFileWriter.PARQUET_METADATA_FILE) + && !path.getName().equals(ParquetFileWriter.PARQUET_COMMON_METADATA_FILE) + && status.getLen() > 0) { + if (partFilePrefix == null || path.getName().startsWith(partFilePrefix)) { + paths.add(path); + } + } + } + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + if (paths.isEmpty()) { + LOGGER.warn("The MapReduce job didn't produce any output. This may not be expected."); + } else if (paths.size() == 1) { + LOGGER.info("Copy to local file"); + LOGGER.info(" Source : {} ({})", + paths.get(0).toUri(), humanReadableByteCount(fileSystem.getFileStatus(paths.get(0)).getLen(), false)); + LOGGER.info(" Target : {}", localOutput.toUri()); + fileSystem.copyToLocalFile(false, paths.get(0), localOutput); + } else { + LOGGER.info("Concat and copy to local : " + paths.size() + " partial files"); + LOGGER.info(" Source {}: {}", getCompression(paths.get(0).getName()), mrOutdir.toUri()); + LOGGER.info(" Target {}: {}", getCompression(localOutput.getName()), localOutput.toUri()); + LOGGER.info(" ---- "); + + try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localOutput.getFileSystem(getConf()).create(localOutput))) { + for (int i = 0; i < paths.size(); i++) { + Path path = paths.get(i); + LOGGER.info("[{}] Concat {} file : '{}' ({}) ", + i, + getCompression(path.getName()), + path.toUri(), + humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); + try (InputStream isAux = getInputStream(path.getName(), fileSystem.open(path))) { + InputStream is = isAux; + // Remove extra headers from all files but the first + if (removeExtraHeaders && i != 0) { + BufferedReader br = new BufferedReader(new InputStreamReader(is)); + String line; + do { + br.mark(10 * 1024 * 1024); //10MB + line = br.readLine(); + // Skip blank lines and + } while (line != null && (StringUtils.isBlank(line) || line.startsWith("#"))); + br.reset(); + is = new ReaderInputStream(br, Charset.defaultCharset()); + } + + IOUtils.copyBytes(is, os, getConf(), false); + } + } + } + LOGGER.info("File size : " + humanReadableByteCount(Files.size(Paths.get(localOutput.toUri())), false)); + LOGGER.info("Time to copy from HDFS and concat : " + TimeUtils.durationToString(stopWatch)); + } + return paths; + } + + private static String getCompression(String name) throws IOException { + if (name.endsWith(".gz")) { + return "gzip"; + } else if (name.endsWith(".snappy")) { + return "snappy"; + } else if (name.endsWith(".lz4")) { + return "lz4"; + } else if (name.endsWith(".zst")) { + return "ztandard"; + } else { + return "plain"; + } + } + + private OutputStream getOutputStreamPlain(String name, OutputStream os) throws IOException { + CompressionCodec codec = getCompressionCodec(name); + if (codec == null) { + return os; + } + try { + return codec.createOutputStream(os); + } catch (UnsatisfiedLinkError error) { + if (codec instanceof SnappyCodec) { + return new SnappyOutputStream(os); + } else { + throw error; + } + } + } + + private CompressionCodec getCompressionCodec(String name) throws IOException { + return getCompressionCodec(getCompression(name), getConf()); + } + + public static CompressionCodec getCompressionCodec(String codecName, Configuration conf) throws IOException { + Class codecClass; + switch (codecName) { + case "deflate": + codecClass = DeflateCodec.class; + break; + case "gz": + case "gzip": + codecClass = GzipCodec.class; + break; + case "snappy": + codecClass = SnappyCodec.class; + break; + case "lz4": + codecClass = Lz4Codec.class; + break; + case "ztandard": + codecClass = ZStandardCodec.class; + break; + case "bz": + codecClass = BZip2Codec.class; + break; + case "plain": + return null; + default: + throw new IOException("Unknown compression codec " + codecName); + } + return ReflectionUtils.newInstance(codecClass, conf); + } + + private InputStream getInputStream(String name, InputStream is) throws IOException { + CompressionCodec codec = getCompressionCodec(name); + if (codec == null) { + return is; + } + try { + return codec.createInputStream(is); + } catch (UnsatisfiedLinkError error) { + if (codec instanceof SnappyCodec) { + return new SnappyInputStream(is); + } else { + throw error; + } + } + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java index 356df234d3..b72c4d9d45 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/AbstractVariantsTableDriver.java @@ -33,6 +33,7 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.hadoop.utils.AbstractHBaseDriver; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.opencb.opencga.storage.hadoop.variant.archive.ArchiveTableHelper; import org.opencb.opencga.storage.hadoop.variant.gaps.FillMissingFromArchiveTask; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; @@ -44,6 +45,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.*; +import java.util.function.Supplier; import java.util.stream.Collectors; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.FILE_ID; @@ -267,6 +269,27 @@ protected String getArchiveTable() { return getConf().get(ArchiveTableHelper.CONFIG_ARCHIVE_TABLE_NAME, StringUtils.EMPTY); } + protected MapReduceOutputFile initMapReduceOutputFile() throws IOException { + return initMapReduceOutputFile(null); + } + + protected MapReduceOutputFile initMapReduceOutputFile(Supplier nameGenerator) throws IOException { + return initMapReduceOutputFile(nameGenerator, false); + } + + protected MapReduceOutputFile initMapReduceOutputFile(Supplier nameGenerator, boolean optional) throws IOException { + String output = getParam(OUTPUT_PARAM); + if (StringUtils.isEmpty(output)) { + if (optional) { + return null; + } else { + throw new IllegalArgumentException("Expected param " + OUTPUT_PARAM); + } + } + return new MapReduceOutputFile(output, nameGenerator, + getTableNameGenerator().getDbName() + "_" + getClass().getSimpleName(), getConf()); + } + protected HBaseVariantTableNameGenerator getTableNameGenerator() { String dbName = HBaseVariantTableNameGenerator.getDBNameFromVariantsTableName(getVariantsTable()); return new HBaseVariantTableNameGenerator(dbName, getConf()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 023dbbaeec..3affa2e3b9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -1309,7 +1309,7 @@ private Configuration getHadoopConfiguration(ObjectMap options) { public MRExecutor getMRExecutor() throws StorageEngineException { if (mrExecutor == null) { - mrExecutor = MRExecutorFactory.getMRExecutor(getOptions()); + mrExecutor = MRExecutorFactory.getMRExecutor(getDBName(), getOptions(), getConf()); } return mrExecutor; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java index 0d471c8387..32da542bfe 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/VariantTableAggregationDriver.java @@ -1,9 +1,8 @@ package org.opencb.opencga.storage.hadoop.variant; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.opencb.commons.datastore.core.Query; @@ -11,6 +10,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.opencb.opencga.storage.hadoop.variant.mr.VariantRowMapper; import org.slf4j.Logger; @@ -28,10 +28,8 @@ public abstract class VariantTableAggregationDriver extends AbstractVariantsTableDriver { private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - public static final String OUTPUT = "output"; - - protected Path outdir; - protected Path localOutput; + public static final String OUTPUT = OUTPUT_PARAM; + protected MapReduceOutputFile output; @Override @@ -51,16 +49,7 @@ protected void parseAndValidateParameters() throws IOException { throw new IllegalArgumentException("Missing study"); } - String outdirStr = getParam(OUTPUT); - if (StringUtils.isNotEmpty(outdirStr)) { - outdir = new Path(outdirStr); - - if (isLocal(outdir)) { - localOutput = getLocalOutput(outdir, this::generateOutputFileName); - outdir = getTempOutdir("opencga_sample_variant_stats", localOutput.getName()); - outdir.getFileSystem(getConf()).deleteOnExit(outdir); - } - } + output = initMapReduceOutputFile(null, true); } @@ -118,17 +107,11 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws job.setOutputKeyClass(getOutputKeyClass()); job.setOutputValueClass(getOutputValueClass()); - if (outdir == null) { + if (output == null) { job.setOutputFormatClass(NullOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outdir); - if (localOutput == null) { - LOGGER.info("Output directory : " + outdir); - } else { - LOGGER.info("Temporary output directory : " + outdir); - LOGGER.info("Local output file : " + localOutput); - } + FileOutputFormat.setOutputPath(job, output.getOutdir()); // set Path } int numReduceTasks = getNumReduceTasks(); @@ -142,13 +125,8 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws @Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); - if (succeed) { - if (localOutput != null) { - concatMrOutputToLocal(outdir, localOutput, isOutputWithHeaders(), null); - } - } - if (localOutput != null) { - deleteTemporaryFile(outdir); + if (output != null) { + output.postExecute(succeed); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriver.java index 0e11259da3..14398d925a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriver.java @@ -3,7 +3,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; @@ -16,7 +15,6 @@ import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.ProjectMetadata; @@ -25,6 +23,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.converters.VariantRow; import org.opencb.opencga.storage.hadoop.variant.converters.annotation.HBaseToVariantAnnotationConverter; @@ -50,7 +49,7 @@ public class FisherTestDriver extends AbstractVariantsTableDriver { private final Logger logger = LoggerFactory.getLogger(FisherTestDriver.class); // Output directory within DFS - public static final String OUTPUT = "output"; + public static final String OUTPUT = OUTPUT_PARAM; // // Move to local directory (remove from DFS) // public static final String MOVE_TO_LOCAL = "move-to-local"; public static final String CASE_COHORT = "caseCohort"; @@ -64,8 +63,7 @@ public class FisherTestDriver extends AbstractVariantsTableDriver { private Integer controlCohortId; private List caseCohort; private List controlCohort; - private Path outdir; - private Path localOutput; + private MapReduceOutputFile output; private Query query; private QueryOptions queryOptions; @@ -138,25 +136,10 @@ protected void parseAndValidateParameters() throws IOException { VariantField.STUDIES_SECONDARY_ALTERNATES, VariantField.STUDIES_STATS)); - String outdirStr = getConf().get(OUTPUT); - if (StringUtils.isEmpty(outdirStr)) { - outdir = new Path("fisher." + TimeUtils.getTime() + ".tsv"); - } else { - outdir = new Path(outdirStr); - if (isLocal(outdir)) { - localOutput = getLocalOutput(outdir, () -> "fisher_test." + TimeUtils.getTime() + ".tsv.gz"); - outdir = getTempOutdir("opencga_fisher_test", "." + localOutput.getName()); - outdir.getFileSystem(getConf()).deleteOnExit(outdir); - } - if (localOutput != null) { - logger.info(" * Outdir file: " + localOutput.toUri()); - logger.info(" * Temporary outdir file: " + outdir.toUri()); - } else { - logger.info(" * Outdir file: " + outdir.toUri()); - } - } + output = initMapReduceOutputFile(); } + private Pair> parseCohort(String cohortStr, String cohortDescription) throws IOException { VariantStorageMetadataManager metadataManager = getMetadataManager(); int studyId = getStudyId(); @@ -202,11 +185,11 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws job.getConfiguration().set(CONTROL_COHORT_IDS, controlCohort.stream().map(Objects::toString).collect(Collectors.joining(","))); job.setOutputFormatClass(TextOutputFormat.class); - if (outdir.toString().toLowerCase().endsWith(".gz")) { + if (output.getOutdir().toString().toLowerCase().endsWith(".gz")) { TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } - TextOutputFormat.setOutputPath(job, outdir); + TextOutputFormat.setOutputPath(job, output.getOutdir()); job.setReducerClass(FisherTestReducer.class); job.setMapOutputKeyClass(NullWritable.class); @@ -227,14 +210,7 @@ protected String getJobOperationName() { @Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); - if (succeed) { - if (localOutput != null) { - concatMrOutputToLocal(outdir, localOutput); - } - } - if (localOutput != null) { - deleteTemporaryFile(outdir); - } + output.postExecute(succeed); } public static class FisherTestMapper extends VariantRowMapper { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java index f8c85a813f..fd0c805d1d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java @@ -18,6 +18,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Tool; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.GitRepositoryState; @@ -46,6 +47,8 @@ public abstract class MRExecutor { public static final String HADOOP_LIB_VERSION_PROPERTIES = "org/opencb/opencga/storage/hadoop/lib/version.properties"; + protected String dbName; + protected Configuration conf; private ObjectMap options; private List env; private static Logger logger = LoggerFactory.getLogger(MRExecutor.class); @@ -74,8 +77,10 @@ public String getErrorMessage() { } } - public MRExecutor init(ObjectMap options) { + public MRExecutor init(String dbName, Configuration conf, ObjectMap options) { + this.dbName = dbName; this.options = options; + this.conf = conf; env = options.getAsStringList(MR_HADOOP_ENV.key()); return this; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java index 2966683177..a51e4d8cde 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java @@ -1,5 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.executors; +import org.apache.hadoop.conf.Configuration; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; @@ -15,33 +16,29 @@ public final class MRExecutorFactory { private MRExecutorFactory() { } - public static MRExecutor getMRExecutor(ObjectMap options) throws StorageEngineException { + public static MRExecutor getMRExecutor(String dbName, ObjectMap options, Configuration conf) throws StorageEngineException { MRExecutor mrExecutor; - Class aClass; String executor = options.getString(MR_EXECUTOR.key(), MR_EXECUTOR.defaultValue()); switch (executor.toLowerCase()) { case "system": - aClass = SystemMRExecutor.class; + mrExecutor = new SystemMRExecutor(); break; case "ssh": - aClass = SshMRExecutor.class; + mrExecutor = new SshMRExecutor(); break; default: try { + Class aClass; aClass = Class.forName(executor).asSubclass(MRExecutor.class); - } catch (ClassNotFoundException | ClassCastException e) { + mrExecutor = aClass.newInstance(); + } catch (InstantiationException | IllegalAccessException | ClassNotFoundException | ClassCastException e) { throw new StorageEngineException("Error creating MRExecutor '" + executor + "'", e); } break; } - try { - mrExecutor = aClass.newInstance(); - } catch (InstantiationException | IllegalAccessException e) { - throw new StorageEngineException("Error creating MRExecutor '" + executor + "'", e); - } // configure MRExecutor - mrExecutor.init(options); + mrExecutor.init(dbName, conf, options); return mrExecutor; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index faea918588..e410e21265 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.executors; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.RunJar; import org.apache.tools.ant.types.Commandline; import org.opencb.commons.datastore.core.ObjectMap; @@ -8,10 +9,13 @@ import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.hadoop.utils.AbstractHBaseDriver; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.URI; import java.nio.charset.Charset; import java.nio.file.Path; import java.nio.file.Paths; @@ -35,17 +39,17 @@ public class SshMRExecutor extends MRExecutor { // env-var expected by "sshpass -e" private static final String SSHPASS_ENV = "SSHPASS"; public static final String PID = "PID"; - public static final String EXTRA_OUTPUT_PREFIX = "EXTRA_OUTPUT_"; private static Logger logger = LoggerFactory.getLogger(SshMRExecutor.class); @Override - public SshMRExecutor init(ObjectMap options) { - super.init(options); + public SshMRExecutor init(String dbName, Configuration conf, ObjectMap options) { + super.init(dbName, conf, options); return this; } @Override public Result run(String executable, String[] args) throws StorageEngineException { + MapReduceOutputFile mrOutput = initMrOutput(executable, args); String commandLine = buildCommand(executable, args); List env = buildEnv(); @@ -105,17 +109,79 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio int exitValue = command.getExitValue(); Runtime.getRuntime().removeShutdownHook(hook); ObjectMap result = readResult(new String(outputStream.toByteArray(), Charset.defaultCharset())); - if (exitValue == 0) { - copyOutputFiles(args, env); - for (String key : result.keySet()) { - if (key.startsWith(EXTRA_OUTPUT_PREFIX)) { - copyOutputFiles(result.getString(key), env); + boolean succeed = exitValue == 0; + if (mrOutput != null) { + try { + mrOutput.postExecute(result, succeed); + } catch (IOException e) { + throw new StorageEngineException(e.getMessage(), e); + } + } + try { + if (succeed) { + if (mrOutput != null) { + mrOutput.postExecute(result, succeed); + } else { + copyOutputFiles(args, env); } + // Copy extra output files + for (String key : result.keySet()) { + if (key.startsWith(MapReduceOutputFile.EXTRA_OUTPUT_PREFIX)) { + copyOutputFiles(result.getString(key), env); + } + } + } else { + if (mrOutput != null) { + mrOutput.postExecute(result, succeed); + } // else // should delete remote output files? } + } catch (IOException e) { + throw new StorageEngineException(e.getMessage(), e); } return new Result(exitValue, result); } + /** + * If the MapReduce to be executed is writing to a local filesystem, change the output to a temporary HDFS path. + * The output will be copied to the local filesystem after the execution. + *

+ * This method will look for the ${@link AbstractHBaseDriver#OUTPUT_PARAM} argument in the args array. + * + * @param executable Executable + * @param args Arguments passed to the executable. Might be modified + * @return MapReduceOutputFile if any + * @throws StorageEngineException if there is an issue creating the temporary output path + */ + private MapReduceOutputFile initMrOutput(String executable, String[] args) throws StorageEngineException { + MapReduceOutputFile mrOutput = null; + List argsList = Arrays.asList(args); + int outputIdx = argsList.indexOf(AbstractHBaseDriver.OUTPUT_PARAM); + if (outputIdx > 0 && argsList.size() > outputIdx + 1) { + String output = argsList.get(outputIdx + 1); + URI outputUri = UriUtils.createUriSafe(output); + if (MapReduceOutputFile.isLocal(outputUri)) { + try { + int i = executable.lastIndexOf('.'); + String tempFilePrefix; + if (i > 0) { + String className = executable.substring(i); + tempFilePrefix = dbName + "_" + className; + } else { + tempFilePrefix = dbName; + } + mrOutput = new MapReduceOutputFile(outputUri.toString(), null, + tempFilePrefix, true, conf); + } catch (IOException e) { + throw new StorageEngineException(e.getMessage(), e); + } + logger.info("Change output from file:// to hdfs://. Using MapReduceOutputFile: " + mrOutput.getOutdir()); + // Replace output path with the temporary path + argsList.set(outputIdx + 1, mrOutput.getOutdir().toString()); + } + } + return mrOutput; + } + /** * Copy output files from remote server to local filesystem. *

@@ -129,7 +195,7 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio */ private Path copyOutputFiles(String[] args, List env) throws StorageEngineException { List argsList = Arrays.asList(args); - int outputIdx = argsList.indexOf("output"); + int outputIdx = argsList.indexOf(AbstractHBaseDriver.OUTPUT_PARAM); if (outputIdx > 0 && argsList.size() > outputIdx + 1) { return copyOutputFiles(argsList.get(outputIdx + 1), env); } @@ -138,7 +204,12 @@ private Path copyOutputFiles(String[] args, List env) throws StorageEngi } private Path copyOutputFiles(String output, List env) throws StorageEngineException { - String targetOutput = UriUtils.createUriSafe(output).getPath(); + URI targetOutputUri = UriUtils.createUriSafe(output); + if (MapReduceOutputFile.isLocal(targetOutputUri)) { + logger.info("Output is not a file:// URI. Skipping copy file {}", targetOutputUri); + return null; + } + String targetOutput = targetOutputUri.getPath(); if (StringUtils.isNotEmpty(targetOutput)) { String remoteOpencgaHome = getOptions().getString(MR_EXECUTOR_SSH_REMOTE_OPENCGA_HOME.key()); String srcOutput; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java index 53511b8473..f0dd9ed787 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporter.java @@ -75,7 +75,7 @@ public HadoopVariantExporter(HadoopVariantStorageEngine engine, VariantMetadataF public List export(@Nullable URI outputFileUri, VariantWriterFactory.VariantOutputFormat outputFormat, URI variantsFile, ParsedVariantQuery variantQuery) throws IOException, StorageEngineException { - VariantHadoopDBAdaptor dbAdaptor = ((VariantHadoopDBAdaptor) engine.getDBAdaptor()); + VariantHadoopDBAdaptor dbAdaptor = engine.getDBAdaptor(); IOConnector ioConnector = ioConnectorProvider.get(outputFileUri); // Use pre-processed query instead of input query @@ -199,7 +199,8 @@ public List export(@Nullable URI outputFileUri, VariantWriterFactory.Varian || (variantsFile != null) || smallQuery || queryOptions.getBoolean("skipMapReduce", false) - || (!(ioConnector instanceof HDFSIOConnector) && !(ioConnector instanceof LocalIOConnector))) { + // Mapreduce can only use HDFS or Local IOConnectors. When using other IOConnectors, skip mapreduce + || !(ioConnector instanceof HDFSIOConnector || ioConnector instanceof LocalIOConnector)) { return super.export(outputFileUri, outputFormat, variantsFile, variantQuery); } else { outputFileUri = VariantWriterFactory.checkOutput(outputFileUri, outputFormat); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java index 8916c5242a..2658435b80 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantDriver.java @@ -11,6 +11,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapper; @@ -38,8 +39,6 @@ */ public abstract class VariantDriver extends AbstractVariantsTableDriver { - public static final String OUTPUT_PARAM = "output"; - public static final String CONCAT_OUTPUT_PARAM = "concat-output"; protected MapReduceOutputFile output; private final Query query = new Query(); private final QueryOptions options = new QueryOptions(); @@ -52,7 +51,7 @@ protected void parseAndValidateParameters() throws IOException { super.parseAndValidateParameters(); // useReduceStep = Boolean.valueOf(getParam(CONCAT_OUTPUT_PARAM)); - output = new MapReduceOutputFile(getTableNameGenerator().getDbName() + "_" + getClass().getSimpleName()); + output = initMapReduceOutputFile(); getQueryFromConfig(query, getConf()); getQueryOptionsFromConfig(options, getConf()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index 93a75006fb..eea7f69d5b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -142,7 +142,9 @@ protected void setupJob(Job job) throws IOException { } if (SnappyCodec.isNativeCodeLoaded()) { FileOutputFormat.setCompressOutput(job, true); - FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); + // FIXME: SnappyCodec might not be available in client side +// FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); + FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index 91ac57391d..960196f6f1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -166,7 +166,9 @@ protected void setupJob(Job job) throws IOException { job.setOutputFormatClass(ValueOnlyTextOutputFormat.class); if (SnappyCodec.isNativeCodeLoaded()) { FileOutputFormat.setCompressOutput(job, true); - FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); + // FIXME: SnappyCodec might not be available in client side +// FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); + FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index 77786498ed..46b059e05c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -222,7 +222,9 @@ public static void initVariantMapperJobFromHBase(Job job, String variantTableNam job.setInputFormatClass(HBaseVariantTableInputFormat.class); job.getConfiguration().setBoolean(HBaseVariantTableInputFormat.MULTI_SCANS, scans.size() > 1); job.getConfiguration().setBoolean(HBaseVariantTableInputFormat.USE_SAMPLE_INDEX_TABLE_INPUT_FORMAT, useSampleIndex); - job.getConfiguration().set(HBaseVariantTableInputFormat.SAMPLE_INDEX_TABLE, sampleIndexTable); + if (sampleIndexTable != null) { + job.getConfiguration().set(HBaseVariantTableInputFormat.SAMPLE_INDEX_TABLE, sampleIndexTable); + } } public static void initVariantMapperJobFromPhoenix(Job job, VariantHadoopDBAdaptor dbAdaptor, diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/prune/VariantPruneDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/prune/VariantPruneDriver.java index fddaa7c189..239331c0ac 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/prune/VariantPruneDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/prune/VariantPruneDriver.java @@ -21,6 +21,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.PhoenixHelper; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixSchema; @@ -60,7 +61,7 @@ protected Class getMapperClass() { @Override protected String getJobOperationName() { - return "vairants-prune"; + return "variants-prune"; } @Override @@ -82,9 +83,7 @@ protected void parseAndValidateParameters() throws IOException { params.updateParams(new HashMap<>(Collections.singletonMap(key, value))); } } - output = new MapReduceOutputFile( - () -> "variant_prune_report." + TimeUtils.getTime() + ".txt", - "variant_prune_report"); + output = initMapReduceOutputFile(() -> "variant_prune_report." + TimeUtils.getTime() + ".txt"); } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java index e9dfa8ee28..b633508455 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java @@ -63,9 +63,6 @@ protected void parseAndValidateParameters() throws IOException { String samples = getParam(SAMPLES); String cohort = getParam(COHORT); - if (outdir == null) { - throw new IllegalArgumentException("Expected param " + OUTPUT); - } if (samples == null && cohort == null) { throw new IllegalArgumentException("Expected param " + SAMPLES + " or " + COHORT); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java index 33f4e07675..7ff4049afc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java @@ -252,7 +252,7 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws } job.getConfiguration().setInt(STUDY_ID, getStudyId()); job.getConfiguration().set(TRIOS, trios); - if (outdir != null) { + if (output != null) { job.getConfiguration().setBoolean(WRITE_TO_DISK, true); } return job; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java index c1dc34e2e2..20d1720531 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java @@ -19,6 +19,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.stats.VariantStatisticsManager; +import org.opencb.opencga.storage.hadoop.utils.MapReduceOutputFile; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHBaseQueryParser; @@ -101,9 +102,9 @@ protected void parseAndValidateParameters() throws IOException { logger.info(" * " + VariantStorageOptions.STATS_DEFAULT_GENOTYPE.key() + ": " + statsDefaultGenotype); - output = new MapReduceOutputFile(() -> "variant_stats." + output = initMapReduceOutputFile(() -> "variant_stats." + (cohorts.size() < 10 ? "." + String.join("_", cohortNames) : "") - + TimeUtils.getTime() + ".json", "opencga_sample_variant_stats"); + + TimeUtils.getTime() + ".json", true); } @Override @@ -129,7 +130,7 @@ protected Job setupJob(Job job, String archiveTableName, String variantTableName query.put(VariantQueryParam.INCLUDE_FILE.key(), VariantQueryUtils.NONE); } - if (output.getOutdir() != null) { + if (output != null) { // Do not index stats. // Allow any input query. // Write stats to file. @@ -212,7 +213,9 @@ protected Job setupJob(Job job, String archiveTableName, String variantTableName @Override protected void postExecution(boolean succeed) throws IOException, StorageEngineException { super.postExecution(succeed); - output.postExecute(succeed); + if (output != null) { + output.postExecute(succeed); + } } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutorTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutorTest.java index 99f109eff1..5d3018deee 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutorTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutorTest.java @@ -1,5 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.executors; +import org.apache.hadoop.conf.Configuration; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -36,14 +37,14 @@ public void setUp() throws Exception { @Test public void testFactory() throws StorageEngineException { - MRExecutor mrExecutor = MRExecutorFactory.getMRExecutor(options); + MRExecutor mrExecutor = MRExecutorFactory.getMRExecutor("", options, new Configuration()); assertThat(mrExecutor, instanceOf(SshMRExecutor.class)); } @Test public void testRun() throws StorageEngineException { SshMRExecutor sshMRExecutor = new SshMRExecutor(); - sshMRExecutor.init(options); + sshMRExecutor.init("", new Configuration(), options); String cmd = sshMRExecutor.buildCommand("echo", "hello world", HadoopVariantStorageOptions.MR_EXECUTOR_SSH_PASSWORD.key(), "password"); assertEquals("/opt/opencga/misc/scripts/hadoop-ssh.sh echo \"hello world\" " + HadoopVariantStorageOptions.MR_EXECUTOR_SSH_PASSWORD.key() + " _redacted_", cmd); @@ -57,7 +58,7 @@ public void testRun() throws StorageEngineException { @Test public void testChangeRemoteOpenCGAHome() throws StorageEngineException { SshMRExecutor sshMRExecutor = new SshMRExecutor(); - sshMRExecutor.init(options.append(HadoopVariantStorageOptions.MR_EXECUTOR_SSH_REMOTE_OPENCGA_HOME.key(), "/home/user/opencga")); + sshMRExecutor.init("", new Configuration(), options.append(HadoopVariantStorageOptions.MR_EXECUTOR_SSH_REMOTE_OPENCGA_HOME.key(), "/home/user/opencga")); String hadoopClasspath = "/opt/opencga/libs/myLib.jar::/opt/opencga/libs/myLibOther.jar:/opt/opencga/conf/hadoop"; String expectedHadoopClasspath = "/home/user/opencga/libs/myLib.jar:/home/user/opencga/libs/myLibOther.jar:/home/user/opencga/conf/hadoop"; diff --git a/pom.xml b/pom.xml index e073746344..840bbb15ec 100644 --- a/pom.xml +++ b/pom.xml @@ -113,7 +113,7 @@ 2.2.0 2.1.0 1.0.0 - 1.1.8.2 + 1.1.10.4 ${parquet-common.version} 5.0 From cd50a3c483deace1e2f45bdec88ae1028862c4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 25 Nov 2024 13:53:39 +0000 Subject: [PATCH 060/122] storage: Improve MapReduceOutputFile concatMrOutputToLocal. #TASK-6722 --- .../opencb/opencga/core/common/IOUtils.java | 115 ++++++++++++++++++ .../opencga/core/common/IOUtilsTest.java | 19 ++- .../hadoop/utils/MapReduceOutputFile.java | 43 +++++-- 3 files changed, 163 insertions(+), 14 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java index eb0cdeaf29..bfc9f3524b 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java @@ -16,12 +16,21 @@ package org.opencb.opencga.core.common; +import org.opencb.commons.run.ParallelTaskRunner; + import java.io.*; +import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.file.*; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -407,4 +416,110 @@ public static long fromHumanReadableToByte(String value, boolean assumeBinary) { } return (long) (Double.parseDouble(value) * Math.pow(unit, exp)); } + + public static void copyBytesParallel(InputStream is, OutputStream os) throws IOException { + copyBytesParallel(is, os, 4096); + } + public static void copyBytesParallel(InputStream is, OutputStream os, int bufferSize) throws IOException { + List buffersPool = Collections.synchronizedList(new LinkedList<>()); + ArrayBlockingQueue buffersQueue = new ArrayBlockingQueue<>(5); + AtomicReference exception = new AtomicReference<>(); + + Thread readerThread = new Thread(() -> { + try { + while (true) { + // Take a buffer from the pool or create a new one + ByteBuffer buf = buffersPool.isEmpty() ? ByteBuffer.allocate(bufferSize) : buffersPool.remove(0); + int bytesRead = is.read(buf.array()); + if (bytesRead == -1) { + buffersQueue.put(ByteBuffer.allocate(0)); // Signal end of stream + break; + } + buf.limit(bytesRead); + buffersQueue.put(buf); + } + } catch (Exception e) { + if (!exception.compareAndSet(null, e)) { + exception.get().addSuppressed(e); + } + } + }); + + Thread writerThread = new Thread(() -> { + try { + while (true) { + ByteBuffer buf = buffersQueue.take(); + if (buf.limit() == 0) { + break; // End of stream signal + } + os.write(buf.array(), 0, buf.limit()); + buf.clear(); + // Return the buffer to the pool + buffersPool.add(buf); + } + } catch (Exception e) { + if (!exception.compareAndSet(null, e)) { + exception.get().addSuppressed(e); + } + } + }); + + readerThread.start(); + writerThread.start(); + + try { + readerThread.join(); + writerThread.join(); + } catch (InterruptedException e) { + throw new IOException(e); + } + + if (exception.get() != null) { + throw new IOException(exception.get()); + } + } + + public static void copyBytesParallel2(InputStream is, OutputStream os, int bufferSize) throws IOException { + + List buffersPool = Collections.synchronizedList(new LinkedList<>()); + ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() + .setNumTasks(1) + .setCapacity(5) + .setSorted(true) + .build(); + ParallelTaskRunner runner = new ParallelTaskRunner<>(batchSize -> { + try { + ByteBuffer buf = buffersPool.isEmpty() ? ByteBuffer.allocate(bufferSize) : buffersPool.remove(0); + int bytesRead = is.read(buf.array()); + if (bytesRead > 0) { + if (bytesRead != buf.array().length) { + buf.limit(bytesRead); + buf.rewind(); + } + return Collections.singletonList(buf); + } else { + return Collections.emptyList(); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, t -> t, batch -> { + try { + for (ByteBuffer buf : batch) { + os.write(buf.array(), 0, buf.limit()); + // Return the buffer to the pool + buf.clear(); + buffersPool.add(buf); + } + } catch (IOException e1) { + throw new UncheckedIOException(e1); + } + return true; + }, config); + try { + runner.run(); + } catch (ExecutionException e) { + throw new IOException(e); + } + } } diff --git a/opencga-core/src/test/java/org/opencb/opencga/core/common/IOUtilsTest.java b/opencga-core/src/test/java/org/opencb/opencga/core/common/IOUtilsTest.java index f8d85da1ac..9da0d25a40 100644 --- a/opencga-core/src/test/java/org/opencb/opencga/core/common/IOUtilsTest.java +++ b/opencga-core/src/test/java/org/opencb/opencga/core/common/IOUtilsTest.java @@ -16,14 +16,14 @@ package org.opencb.opencga.core.common; +import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; import org.opencb.opencga.core.testclassification.duration.ShortTests; -import java.io.BufferedReader; -import java.io.InputStream; -import java.io.InputStreamReader; +import java.io.*; import java.nio.file.Paths; +import java.util.Random; @Category(ShortTests.class) public class IOUtilsTest { @@ -68,4 +68,17 @@ public void testGrepFile() throws Exception { in.close(); } + + @Test + public void copyBytesHandlesBufferSizeSmallerThanInput() throws Exception { +// byte[] inputData = "Hello, World!".getBytes(); + byte[] inputData = new byte[10 * 1024 * 1024 + 5]; // 10 MB + new Random().nextBytes(inputData); + InputStream is = new ByteArrayInputStream(inputData); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + + IOUtils.copyBytesParallel(is, os, 4096); + + Assert.assertArrayEquals(inputData, os.toByteArray()); + } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java index e91be76f97..0510f38f46 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java @@ -5,10 +5,7 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.fs.*; import org.apache.hadoop.io.compress.*; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.util.ReflectionUtils; @@ -365,14 +362,17 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localOutput.getFileSystem(getConf()).create(localOutput))) { for (int i = 0; i < paths.size(); i++) { - Path path = paths.get(i); + Path partFile = paths.get(i); + long partFileSize = fileSystem.getFileStatus(partFile).getLen(); LOGGER.info("[{}] Concat {} file : '{}' ({}) ", i, - getCompression(path.getName()), - path.toUri(), - humanReadableByteCount(fileSystem.getFileStatus(path).getLen(), false)); - try (InputStream isAux = getInputStream(path.getName(), fileSystem.open(path))) { - InputStream is = isAux; + getCompression(partFile.getName()), + partFile.toUri(), + humanReadableByteCount(partFileSize, false)); + InputStream is = null; + Throwable e = null; + try { + is = getInputStream(partFile.getName(), fileSystem.open(partFile)); // Remove extra headers from all files but the first if (removeExtraHeaders && i != 0) { BufferedReader br = new BufferedReader(new InputStreamReader(is)); @@ -386,7 +386,28 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool is = new ReaderInputStream(br, Charset.defaultCharset()); } - IOUtils.copyBytes(is, os, getConf(), false); + if (partFileSize > 50 * 1024 * 1024) { + org.opencb.opencga.core.common.IOUtils.copyBytesParallel(is, os, getConf().getInt( + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT)); + } else { + org.apache.hadoop.io.IOUtils.copyBytes(is, os, getConf(), false); + } + } catch (Throwable throwable) { + e = throwable; + throw throwable; + } finally { + if (is != null) { + try { + is.close(); + } catch (IOException ex) { + if (e == null) { + throw ex; + } else { + e.addSuppressed(ex); + } + } + } } } } From d430391dfcf7afea27d228dc2135fb25274bf1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 25 Nov 2024 17:28:28 +0000 Subject: [PATCH 061/122] storage: Increase mapreduce.task.timeout to 30min #TASK-6722 --- .../src/main/resources/storage-configuration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml index dfa6865eb4..c80d7ffb92 100644 --- a/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml +++ b/opencga-storage/opencga-storage-core/src/main/resources/storage-configuration.yml @@ -187,6 +187,7 @@ variant: # DOCKER_HOST environment variable to be used by the docker executor inside the MapReduce job storage.hadoop.mr.stream.docker.host: "" + mapreduce.task.timeout: 1800000 mapreduce.map.memory.mb: 2048 DeleteHBaseColumnDriver: storage.hadoop.write.mappers.limit.factor: 4 From e35ee834c3f6653a69bc3f7a263ee342022086e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 25 Nov 2024 17:58:55 +0000 Subject: [PATCH 062/122] storage: Fix temporary mapreduce outdir. #TASK-6722 --- .../opencga/storage/hadoop/variant/executors/SshMRExecutor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index e410e21265..a4c974394b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -165,7 +165,7 @@ private MapReduceOutputFile initMrOutput(String executable, String[] args) throw String tempFilePrefix; if (i > 0) { String className = executable.substring(i); - tempFilePrefix = dbName + "_" + className; + tempFilePrefix = dbName + className; } else { tempFilePrefix = dbName; } From 0c486033762de4229fc13f7dc73ff6788eaa8912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 26 Nov 2024 08:30:32 +0000 Subject: [PATCH 063/122] storage: Do not double copy hdfs files #TASK-6722 --- .../storage/hadoop/variant/executors/SshMRExecutor.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index a4c974394b..af90a7144f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -110,13 +110,6 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio Runtime.getRuntime().removeShutdownHook(hook); ObjectMap result = readResult(new String(outputStream.toByteArray(), Charset.defaultCharset())); boolean succeed = exitValue == 0; - if (mrOutput != null) { - try { - mrOutput.postExecute(result, succeed); - } catch (IOException e) { - throw new StorageEngineException(e.getMessage(), e); - } - } try { if (succeed) { if (mrOutput != null) { From ccf7438be171c55c7130398eb8d53c6e3fcd60be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 26 Nov 2024 11:41:19 +0000 Subject: [PATCH 064/122] storage: Use reducer to concat binary files #TASK-6722 --- .../hadoop/utils/MapReduceOutputFile.java | 2 +- .../variant/executors/MRExecutorFactory.java | 5 ++ .../variant/executors/SshMRExecutor.java | 18 ++++--- .../variant/io/VariantExporterDriver.java | 4 ++ .../variant/HadoopVariantStorageTest.java | 35 ++++++++++-- .../HadoopVariantAnnotationManagerTest.java | 2 +- .../variant/index/sample/SampleIndexTest.java | 8 +-- .../variant/io/HadoopVariantExporterTest.java | 53 ++++++++++++++++--- 8 files changed, 103 insertions(+), 24 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java index 0510f38f46..e80a2d07da 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java @@ -133,7 +133,7 @@ public static boolean isLocal(URI uri) { if (StringUtils.isEmpty(scheme)) { scheme = "file"; } - return StringUtils.isEmpty(scheme) || "file".equals(scheme); + return "file".equals(scheme); } public static boolean isHdfs(Path dir, Configuration conf) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java index a51e4d8cde..335bbeb14a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java @@ -3,6 +3,8 @@ import org.apache.hadoop.conf.Configuration; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.MR_EXECUTOR; @@ -13,6 +15,8 @@ */ public final class MRExecutorFactory { + private static Logger logger = LoggerFactory.getLogger(SshMRExecutor.class); + private MRExecutorFactory() { } @@ -28,6 +32,7 @@ public static MRExecutor getMRExecutor(String dbName, ObjectMap options, Configu break; default: try { + logger.info("Creating new instance of MRExecutor '{}'", executor); Class aClass; aClass = Class.forName(executor).asSubclass(MRExecutor.class); mrExecutor = aClass.newInstance(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index af90a7144f..57a391a24d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -50,7 +50,6 @@ public SshMRExecutor init(String dbName, Configuration conf, ObjectMap options) @Override public Result run(String executable, String[] args) throws StorageEngineException { MapReduceOutputFile mrOutput = initMrOutput(executable, args); - String commandLine = buildCommand(executable, args); List env = buildEnv(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); @@ -103,13 +102,10 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio } }); Runtime.getRuntime().addShutdownHook(hook); - Command command = new Command(commandLine, env); - command.setErrorOutputStream(outputStream); - command.run(); - int exitValue = command.getExitValue(); + int exitValue = runRemote(executable, args, env, outputStream); + boolean succeed = exitValue == 0; Runtime.getRuntime().removeShutdownHook(hook); ObjectMap result = readResult(new String(outputStream.toByteArray(), Charset.defaultCharset())); - boolean succeed = exitValue == 0; try { if (succeed) { if (mrOutput != null) { @@ -134,6 +130,14 @@ public Result run(String executable, String[] args) throws StorageEngineExceptio return new Result(exitValue, result); } + protected int runRemote(String executable, String[] args, List env, ByteArrayOutputStream outputStream) { + String commandLine = buildCommand(executable, args); + Command command = new Command(commandLine, env); + command.setErrorOutputStream(outputStream); + command.run(); + return command.getExitValue(); + } + /** * If the MapReduce to be executed is writing to a local filesystem, change the output to a temporary HDFS path. * The output will be copied to the local filesystem after the execution. @@ -198,7 +202,7 @@ private Path copyOutputFiles(String[] args, List env) throws StorageEngi private Path copyOutputFiles(String output, List env) throws StorageEngineException { URI targetOutputUri = UriUtils.createUriSafe(output); - if (MapReduceOutputFile.isLocal(targetOutputUri)) { + if (!MapReduceOutputFile.isLocal(targetOutputUri)) { logger.info("Output is not a file:// URI. Skipping copy file {}", targetOutputUri); return null; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index eea7f69d5b..a26dd84d6b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -51,6 +51,10 @@ protected void parseAndValidateParameters() throws IOException { super.parseAndValidateParameters(); outputFormat = VariantWriterFactory.VariantOutputFormat.valueOf(getParam(OUTPUT_FORMAT_PARAM, "avro").toUpperCase()); + if (outputFormat.isBinary()) { + // Binary outputs should be concatenated in a reduce step + useReduceStep = true; + } } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java index 4ddc5d03a0..faa05ef988 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageTest.java @@ -90,14 +90,17 @@ import org.opencb.opencga.storage.hadoop.utils.HBaseManager; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixSchemaManager; import org.opencb.opencga.storage.hadoop.variant.executors.MRExecutor; +import org.opencb.opencga.storage.hadoop.variant.executors.SshMRExecutor; import org.opencb.opencga.storage.hadoop.variant.index.IndexUtils; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; import org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.PrintStream; import java.lang.reflect.Method; import java.util.*; import java.util.concurrent.atomic.AtomicReference; @@ -394,13 +397,13 @@ static HadoopVariantStorageEngine getHadoopVariantStorageEngine(Map o } engine.setConfiguration(storageConfiguration, HadoopVariantStorageEngine.STORAGE_ENGINE_ID, VariantStorageBaseTest.DB_NAME); - engine.mrExecutor = new TestMRExecutor(configuration.get()); + engine.mrExecutor = null; engine.conf = conf; return engine; } - default TestMRExecutor getMrExecutor() { - return new TestMRExecutor(configuration.get()); + default MRExecutor getMrExecutor() throws StorageEngineException { + return HadoopVariantStorageTest.manager.get().getMRExecutor(); } static StorageConfiguration getStorageConfiguration(Configuration conf) throws IOException { @@ -416,7 +419,8 @@ static StorageConfiguration updateStorageConfiguration(StorageConfiguration stor StorageEngineConfiguration variantConfiguration = storageConfiguration.getVariantEngine(HadoopVariantStorageEngine.STORAGE_ENGINE_ID); ObjectMap options = variantConfiguration.getOptions(); - options.put(HadoopVariantStorageOptions.MR_EXECUTOR.key(), TestMRExecutor.class.getName()); + options.put(HadoopVariantStorageOptions.MR_JAR_WITH_DEPENDENCIES.key(), "dummy-test-jar-with-depepdencies.jar"); + options.put(HadoopVariantStorageOptions.MR_EXECUTOR.key(), TestSshMrExecutor.class.getName()); TestMRExecutor.setStaticConfiguration(conf); options.put(HadoopVariantStorageOptions.MR_ADD_DEPENDENCY_JARS.key(), false); @@ -517,6 +521,29 @@ default int getExpectedNumLoadedVariants(VariantFileMetadata fileMetadata) { return numRecords; } + class TestSshMrExecutor extends SshMRExecutor { + private final Configuration configuration; + + public TestSshMrExecutor() { + this.configuration = new Configuration(TestMRExecutor.staticConfiguration); + } + + @Override + protected int runRemote(String executable, String[] args, List env, ByteArrayOutputStream outputStream) { + PrintStream out = System.out; + try { + return new TestMRExecutor(conf).run(executable, args).getExitValue(); + } finally { + System.setOut(out); + } + } + + @Override + protected List buildEnv() { + return new LinkedList<>(); + } + } + class TestMRExecutor extends MRExecutor { private static Configuration staticConfiguration; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/annotation/HadoopVariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/annotation/HadoopVariantAnnotationManagerTest.java index 253a24fc77..fa09dad879 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/annotation/HadoopVariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/annotation/HadoopVariantAnnotationManagerTest.java @@ -48,7 +48,7 @@ public void incrementalAnnotationTest() throws Exception { .append(VariantStorageOptions.STATS_CALCULATE.key(), false)); // Update pending variants - new TestMRExecutor().run(DiscoverPendingVariantsDriver.class, + getMrExecutor().run(DiscoverPendingVariantsDriver.class, DiscoverPendingVariantsDriver.buildArgs(engine.getDBAdaptor().getVariantTable(), AnnotationPendingVariantsDescriptor.class, new ObjectMap()), "Prepare variants to annotate"); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index 98062c27b8..2e64c65229 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -324,13 +324,13 @@ public void regenerateSampleIndex() throws Exception { .append(SampleIndexDriver.SAMPLE_INDEX_VERSION, version) .append(SampleIndexDriver.OUTPUT, copy) .append(SampleIndexDriver.SAMPLES, "all"); - new TestMRExecutor().run(SampleIndexDriver.class, SampleIndexDriver.buildArgs( + getMrExecutor().run(SampleIndexDriver.class, SampleIndexDriver.buildArgs( dbAdaptor.getArchiveTableName(studyId), dbAdaptor.getVariantTable(), studyId, Collections.emptySet(), options), ""); - new TestMRExecutor().run(SampleIndexAnnotationLoaderDriver.class, SampleIndexAnnotationLoaderDriver.buildArgs( + getMrExecutor().run(SampleIndexAnnotationLoaderDriver.class, SampleIndexAnnotationLoaderDriver.buildArgs( dbAdaptor.getArchiveTableName(studyId), dbAdaptor.getVariantTable(), studyId, @@ -339,7 +339,7 @@ public void regenerateSampleIndex() throws Exception { if (sampleNames.get(study).containsAll(trios.get(0).toList())) { options.put(FamilyIndexDriver.TRIOS, trios.stream().map(Trio::serialize).collect(Collectors.joining(";"))); options.put(FamilyIndexDriver.OVERWRITE, true); - new TestMRExecutor().run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs( + getMrExecutor().run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs( dbAdaptor.getArchiveTableName(studyId), dbAdaptor.getVariantTable(), studyId, @@ -347,7 +347,7 @@ public void regenerateSampleIndex() throws Exception { } else if (study.equals(STUDY_NAME_3)) { options.put(FamilyIndexDriver.TRIOS, triosPlatinum.stream().map(Trio::serialize).collect(Collectors.joining(";"))); options.put(FamilyIndexDriver.OVERWRITE, true); - new TestMRExecutor().run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs( + getMrExecutor().run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs( dbAdaptor.getArchiveTableName(studyId), dbAdaptor.getVariantTable(), studyId, diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java index 32f0151d67..d787315182 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java @@ -1,11 +1,17 @@ package org.opencb.opencga.storage.hadoop.variant.io; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.*; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.opencb.biodata.models.metadata.Individual; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.metadata.VariantMetadata; +import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -17,6 +23,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.io.VariantExporter; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.core.variant.io.avro.VariantAvroReader; import org.opencb.opencga.storage.core.variant.solr.VariantSolrExternalResource; import org.opencb.opencga.storage.hadoop.HBaseCompat; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; @@ -24,8 +31,14 @@ import org.opencb.opencga.storage.hadoop.variant.VariantHbaseTestUtils; import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.nio.file.Paths; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; @@ -140,9 +153,28 @@ public void exportMultiRegion() throws Exception { public void exportAvroGz() throws Exception { String fileName = "variants.avro_gz"; URI uri = getOutputUri(fileName); - uri = variantStorageEngine.exportData(uri, VariantWriterFactory.VariantOutputFormat.AVRO_GZ, null, new Query(STUDY.key(), study1), new QueryOptions()).get(0); + List uris = variantStorageEngine.exportData(uri, VariantWriterFactory.VariantOutputFormat.AVRO_GZ, null, new Query(STUDY.key(), study1), new QueryOptions()); - copyToLocal(uri); + URI outputUri = copyToLocal(uris.get(0)); + if (exportToLocal) { + URI metaUri = copyToLocal(uris.get(1)); + + ObjectMapper objectMapper = new ObjectMapper().configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); + VariantMetadata metadata; + try (InputStream is = ioConnectorProvider.newInputStream(metaUri)) { + metadata = objectMapper.readValue(is, VariantMetadata.class); + } + + Map> samplesPositions = new HashMap<>(); + for (VariantStudyMetadata study : metadata.getStudies()) { + LinkedHashMap samples = samplesPositions.put(study.getId(), new LinkedHashMap<>()); + for (Individual individual : study.getIndividuals()) { + samples.put(individual.getId(), samples.size()); + } + } + List variants = new VariantAvroReader(Paths.get(outputUri).toFile(), samplesPositions).stream().collect(Collectors.toList()); + System.out.println("variants.size() = " + variants.size()); + } } @Test @@ -158,6 +190,7 @@ public void exportVcf() throws Exception { public void exportVcfGz() throws Exception { String fileName = "variants.vcf.gz"; URI uri = getOutputUri(fileName); + System.out.println("variantStorageEngine.getMRExecutor() = " + variantStorageEngine.getMRExecutor()); variantStorageEngine.exportData(uri, VariantWriterFactory.VariantOutputFormat.VCF_GZ, null, new Query(STUDY.key(), study1), new QueryOptions()); copyToLocal(fileName, uri); @@ -281,11 +314,11 @@ public void exportWithGenes() throws Exception { copyToLocal(fileName, uri); } - protected void copyToLocal(URI uri) throws IOException { - copyToLocal(Paths.get(uri.getPath()).getFileName().toString(), uri); + protected URI copyToLocal(URI uri) throws IOException { + return copyToLocal(Paths.get(uri.getPath()).getFileName().toString(), uri); } - protected void copyToLocal(String fileName, URI uri) throws IOException { + protected URI copyToLocal(String fileName, URI uri) throws IOException { if (!exportToLocal) { System.out.println("Copy file " + uri); FileSystem.get(externalResource.getConf()).copyToLocalFile(true, @@ -293,14 +326,20 @@ protected void copyToLocal(String fileName, URI uri) throws IOException { new Path(outputUri.resolve(fileName))); if (fileName.endsWith(VariantExporter.TPED_FILE_EXTENSION)) { + Path dst = new Path(outputUri.resolve(fileName.replace(VariantExporter.TPED_FILE_EXTENSION, VariantExporter.TFAM_FILE_EXTENSION))); FileSystem.get(externalResource.getConf()).copyToLocalFile(true, new Path(uri.toString().replace(VariantExporter.TPED_FILE_EXTENSION, VariantExporter.TFAM_FILE_EXTENSION)), - new Path(outputUri.resolve(fileName.replace(VariantExporter.TPED_FILE_EXTENSION, VariantExporter.TFAM_FILE_EXTENSION)))); + dst); + return dst.toUri(); } else { + Path dst = new Path(outputUri.resolve(fileName + VariantExporter.METADATA_FILE_EXTENSION)); FileSystem.get(externalResource.getConf()).copyToLocalFile(true, new Path(uri.toString() + VariantExporter.METADATA_FILE_EXTENSION), - new Path(outputUri.resolve(fileName + VariantExporter.METADATA_FILE_EXTENSION))); + dst); + return dst.toUri(); } + } else { + return uri; } } From 76ff1e9b2d5b7bfbfc5ad3381ac8cd1e258fb761 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 27 Nov 2024 13:15:38 +0100 Subject: [PATCH 065/122] Prepare Port Patch 3.0.2 -> 3.4.0 Xetabase 2.0.2 -> 2.4.0 #TASK-7213 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 99cdde7b21..7ac4af0923 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index 9c5743a6c3..7aed855b51 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 449d5a615b..e28e69b462 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index aeff862c5c..68d77922ca 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index 244255c3e0..a43011299c 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index f82141f153..6707b15204 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index e6ada2e6d2..6b57d5a3f3 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 083058f829..053c5892bb 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index da3d0f0118..9d3555928b 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 6f7a1d0473..c9bd1a8856 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 4476c2ec63..7aa897d4e9 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index ca885fa290..864aac19c1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index 90de830d5e..e98d6d2626 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index 2d4470a7c9..9f92738cf9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 9cebc36634..f2bdb5f8e9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 4e53543f6c..f79e2ca383 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 3d2d25a078..2e243830f9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index 1fb373a58b..1c5876905e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 976b6942c1..991c499dc8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index e90d48ec48..a960e1a068 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 893010fb72..ae6332feb3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index 0e15f9c746..db8d5b9aec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 29b54b3404..80807b54c5 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index e91230d5af..3dd2a91dd4 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 46b8ac2126..40b18f7559 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 6d68a97aed..8b5c1157ef 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 829c7358ee..7ccc6e562a 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.0.2 + 3.4.0-SNAPSHOT pom OpenCGA From 3b39feb837e9807c6d8f314c5916ab5575af3012 Mon Sep 17 00:00:00 2001 From: pfurio Date: Wed, 27 Nov 2024 14:46:54 +0100 Subject: [PATCH 066/122] app: fix OrgMigration merge issues, #TASK-7213 --- .../v3/v3_0_0/OrganizationMigration.java | 669 +----------------- 1 file changed, 3 insertions(+), 666 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java index 2787e25428..5dbc16b44a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_0_0/OrganizationMigration.java @@ -1,685 +1,22 @@ package org.opencb.opencga.app.migrations.v3.v3_0_0; -<<<<<<< HEAD -import com.mongodb.client.*; -import com.mongodb.client.model.*; -import com.mongodb.client.result.DeleteResult; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.bson.Document; -import org.bson.conversions.Bson; -import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.mongodb.MongoDataStore; -import org.opencb.commons.utils.CryptoUtils; -import org.opencb.commons.utils.FileUtils; -import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; -import org.opencb.opencga.catalog.auth.authentication.CatalogAuthenticationManager; -import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; -import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptorFactory; -import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; -import org.opencb.opencga.catalog.exceptions.CatalogAuthorizationException; -import org.opencb.opencga.catalog.exceptions.CatalogDBException; -import org.opencb.opencga.catalog.exceptions.CatalogException; -import org.opencb.opencga.catalog.exceptions.CatalogIOException; -import org.opencb.opencga.catalog.io.CatalogIOManager; -import org.opencb.opencga.catalog.managers.CatalogManager; -======= ->>>>>>> release-3.x.x import org.opencb.opencga.catalog.migration.Migration; -import org.opencb.opencga.catalog.migration.MigrationException; import org.opencb.opencga.catalog.migration.MigrationTool; -<<<<<<< HEAD -import org.opencb.opencga.catalog.utils.FqnUtils; -import org.opencb.opencga.catalog.utils.ParamUtils; -import org.opencb.opencga.core.api.ParamConstants; -import org.opencb.opencga.core.common.JacksonUtils; -import org.opencb.opencga.core.common.TimeUtils; -import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; -import org.opencb.opencga.core.models.migration.MigrationRun; -import org.opencb.opencga.core.models.organizations.OrganizationCreateParams; -import org.opencb.opencga.core.models.project.DataStore; -import org.opencb.opencga.storage.core.StorageEngineFactory; -import org.opencb.opencga.storage.core.exceptions.StorageEngineException; -import org.opencb.opencga.storage.core.variant.VariantStorageEngine; -import org.reflections.Reflections; -import org.reflections.scanners.SubTypesScanner; -import org.reflections.scanners.TypeAnnotationsScanner; -import org.reflections.util.ClasspathHelper; -import org.reflections.util.ConfigurationBuilder; -import java.io.IOException; -import java.lang.reflect.Modifier; -import java.net.URL; -import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; -import java.security.NoSuchAlgorithmException; -import java.util.*; -import java.util.stream.Collectors; -======= -import org.opencb.opencga.core.config.Configuration; ->>>>>>> release-3.x.x - -import static org.opencb.opencga.core.config.storage.StorageConfiguration.Mode.READ_ONLY; @Migration(id = "add_organizations", description = "Add new Organization layer #TASK-4389", version = "3.0.0", -<<<<<<< HEAD - language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20231212, manual = true) -======= - language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20231212, + language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20231212, manual = true, deprecatedSince = "3.1.0") ->>>>>>> release-3.x.x -public class OrganizationMigration extends MigrationTool { - -<<<<<<< HEAD - private MongoDBAdaptorFactory mongoDBAdaptorFactory; - private String oldDatabase; - private MongoDataStore oldDatastore; - private Set userIdsToDiscardData; - - private MigrationStatus status; - private boolean changeOrganizationId; - - private enum MigrationStatus { - MIGRATED, - PENDING_MIGRATION, - ERROR - } - - public OrganizationMigration(Configuration configuration, String adminPassword, String userId, String organizationId, Path appHome) - throws CatalogException, IOException { - this.configuration = configuration; - this.adminPassword = adminPassword; - this.userId = userId; - this.organizationId = organizationId; - this.appHome = appHome; - - this.status = checkAndInit(); - } - - private MigrationStatus checkAndInit() throws CatalogException, IOException { - this.oldDatabase = configuration.getDatabasePrefix() + "_catalog"; - this.mongoDBAdaptorFactory = new MongoDBAdaptorFactory(configuration); - this.oldDatastore = mongoDBAdaptorFactory.getMongoManager().get(oldDatabase, mongoDBAdaptorFactory.getMongoDbConfiguration()); - - FileUtils.checkDirectory(appHome); - readStorageConfiguration(); - - MongoCollection userCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION); - FindIterable iterable = userCol.find(Filters.eq("id", ParamConstants.OPENCGA_USER_ID)); - try (MongoCursor cursor = iterable.cursor()) { - if (!cursor.hasNext()) { - MongoIterable collectionNames = oldDatastore.getDb().listCollectionNames(); - try (MongoCursor tmpCursor = collectionNames.cursor()) { - if (!tmpCursor.hasNext()) { - logger.info("Database '{}' not found. Database already migrated.", this.oldDatabase); - return MigrationStatus.MIGRATED; - } else { - List collections = new LinkedList<>(); - tmpCursor.forEachRemaining(collections::add); - logger.debug("Found '{}' collections in '{}' database", StringUtils.join(collections, ", "), this.oldDatabase); - return MigrationStatus.ERROR; - } - } - } - Document userDoc = cursor.next(); - String password = userDoc.getString("_password"); - // Check admin password - try { - if (!CryptoUtils.sha1(adminPassword).equals(password)) { - throw CatalogAuthorizationException.opencgaAdminOnlySupportedOperation(); - } - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } - } - - // Check write access to file system - if (!Files.isWritable(Paths.get(configuration.getWorkspace()))) { - throw new CatalogException("Please grant write access to path '" + configuration.getWorkspace() + "'"); - } - - Set> availableMigrations = getAvailableMigrations(); - - // Check all previous v2.x migrations have been run successfully - MongoCollection migrationCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.MIGRATION_COLLECTION); - long count = migrationCol.countDocuments(); - // We take out 1 to the availableMigrations.size() because it will consider the present migration, which is not part of the check - logger.debug("Found '{}' Java migrations", availableMigrations.size() - 1); - logger.debug("Found '{}' migrations registered in the database", count); - if (count < availableMigrations.size() - 1) { - Set migrations = new HashSet<>(); - for (Class availableMigration : availableMigrations) { - Migration annotation = availableMigration.getAnnotation(Migration.class); - // Only consider previous migrations, not the present one - if (!annotation.version().equals("3.0.0")) { - migrations.add(annotation.id() + ":v" + annotation.version()); - } - } - - Set processedMigrations = new HashSet<>(); - Set onlyInJava = new HashSet<>(); - Set onlyInDB = new HashSet<>(); - migrationCol.find().forEach((document) -> { - String migrationId = document.getString("id") + ":v" + document.getString("version"); - processedMigrations.add(migrationId); - if (!migrations.contains(migrationId)) { - onlyInDB.add(migrationId); - } - }); - for (String migration : migrations) { - if (!processedMigrations.contains(migration)) { - onlyInJava.add(migration); - } - } - logger.debug("Migrations not registered in the database: {}", String.join(", ", onlyInJava)); - logger.debug("Migrations only found in the DB: {}", String.join(", ", onlyInDB)); - - throw new CatalogException("Please check past migrations before moving to v3.0.0. Found " - + (availableMigrations.size() -1 - count) + " missing migrations available."); - } - - count = migrationCol.countDocuments(Filters.in("status", MigrationRun.MigrationStatus.PENDING, - MigrationRun.MigrationStatus.ON_HOLD, MigrationRun.MigrationStatus.ERROR)); - if (count > 0) { - throw new CatalogException("Please check past migrations. Found " + count + " migrations with status '" - + MigrationRun.MigrationStatus.PENDING + "', '" + MigrationRun.MigrationStatus.ON_HOLD + "', or '" - + MigrationRun.MigrationStatus.ERROR + "'."); - } - // Retrieve all users with data - DistinctIterable userIdIterable = userCol.distinct("id", - Filters.and( - Filters.ne("projects", null), - Filters.ne("projects", Collections.emptyList())), - String.class); - List userIds = new LinkedList<>(); - try (MongoCursor iterator = userIdIterable.iterator()) { - while (iterator.hasNext()) { - String tmpUserId = iterator.next(); - if (!ParamConstants.OPENCGA_USER_ID.equals(tmpUserId)) { - userIds.add(tmpUserId); - } - } - } - if (StringUtils.isNotEmpty(userId)) { - if (!userIds.contains(userId)) { - throw new CatalogException("User '" + userId + "' does not have any data. Available users to migrate are " - + userIds.stream() - .collect(Collectors.joining(", "))); - } - // Extract users that will need to remove data from - this.userIdsToDiscardData = userIds.stream() - .filter(u -> !userId.equals(u)) - .collect(Collectors.toSet()); - } else { - if (userIds.size() > 1) { - throw new CatalogException("More than 1 user containing data found. Available users to migrate are " - + StringUtils.join(userIds, ", ") + ". Please, choose which one to migrate."); - } else if (userIds.isEmpty()) { - throw new CatalogException("No users found to migrate."); - } - this.userId = userIds.get(0); - this.userIdsToDiscardData = new HashSet<>(); - } - - if (StringUtils.isEmpty(this.organizationId)) { - this.organizationId = this.userId; - } - changeOrganizationId = !this.organizationId.equals(this.userId); - if (changeOrganizationId && readStorageConfiguration().getMode() == READ_ONLY) { - throw new CatalogException("Cannot change organization id when storage is in read-only mode"); - } +public class OrganizationMigration extends MigrationTool { - ParamUtils.checkIdentifier(this.organizationId, "Organization id"); - this.catalogManager = new CatalogManager(configuration); - return MigrationStatus.PENDING_MIGRATION; -======= - public OrganizationMigration(Configuration configuration, String adminPassword, String userId) { ->>>>>>> release-3.x.x + public OrganizationMigration(Configuration configuration, String adminPassword, String userId, String organizationId, Path appHome) { } @Override protected void run() throws Exception { -<<<<<<< HEAD - if (this.status == MigrationStatus.ERROR) { - throw new CatalogException("Corrupted database '" + this.oldDatabase + "' found. Could not migrate."); - } else if (this.status == MigrationStatus.MIGRATED) { - return; - } - - MongoCollection metadataCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.METADATA_COLLECTION); - FindIterable iterable = metadataCol.find(new Document()); - Document metaDoc; - long counter; - String secretKey; - String algorithm; - try (MongoCursor cursor = iterable.cursor()) { - metaDoc = cursor.next(); - counter = metaDoc.get("idCounter", Number.class).longValue(); - Document admin = metaDoc.get("admin", Document.class); - secretKey = admin.getString("secretKey"); - algorithm = admin.getString("algorithm"); - } - - if (!userIdsToDiscardData.isEmpty()) { - MongoCollection studyCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION); - // First, remove unnecessary data - queryMongo(studyCol, Filters.in("_ownerId", userIdsToDiscardData), Projections.include("uid"), studyDoc -> { - long studyUid = studyDoc.get("uid", Number.class).longValue(); - Bson query = Filters.eq("studyUid", studyUid); - - // Delete data associated to the study - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.FILE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.SAMPLE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.INDIVIDUAL_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.COHORT_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.FAMILY_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.PANEL_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.CLINICAL_ANALYSIS_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.INTERPRETATION_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.SAMPLE_ARCHIVE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.INDIVIDUAL_ARCHIVE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.FAMILY_ARCHIVE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.PANEL_ARCHIVE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.INTERPRETATION_ARCHIVE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_FILE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_JOB_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_SAMPLE_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_INDIVIDUAL_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_COHORT_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_FAMILY_COLLECTION).deleteMany(query); - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.DELETED_PANEL_COLLECTION).deleteMany(query); - }); - - // Delete studies - DeleteResult result = studyCol.deleteMany(Filters.in("_ownerId", userIdsToDiscardData)); - if (result.getDeletedCount() > 0) { - logger.info("Deleted {} unnecessary studies", result.getDeletedCount()); - } - - // Remove projects from users - oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION).updateMany( - Filters.in("id", userIdsToDiscardData), - Updates.set("projects", Collections.emptyList()) - ); - } - - // Create admin organization - catalogManager.installCatalogDB(algorithm, secretKey, "MyStr0NgT!mP0r4lP4sWSoRd", "tempmail@tempmail.com", false); - - // Create admin organization - MongoCollection oldUserCollection = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION); - queryMongo(oldUserCollection, Filters.eq("id", ParamConstants.OPENCGA_USER_ID), Projections.exclude("_id"), document -> { - String organizationId = ParamConstants.ADMIN_ORGANIZATION; - try { - MongoDatabase orgDatabase = mongoDBAdaptorFactory.getMongoDataStore(organizationId).getDb(); - - MongoCollection userCollection = orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION); - MongoCollection projectCollection = orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION); - MongoCollection studyCollection = orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION); - MongoCollection fileCollection = orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.FILE_COLLECTION); - MongoCollection migrationCollection = orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.MIGRATION_COLLECTION); - // Empty data from collections with default data - userCollection.deleteMany(new Document()); - projectCollection.deleteMany(new Document()); - studyCollection.deleteMany(new Document()); - fileCollection.deleteMany(new Document()); - migrationCollection.deleteMany(new Document()); - - // Replace user in organization - userCollection.insertOne(document); - - // Extract projects - List projects = document.getList("projects", Document.class); - for (Document project : projects) { - long projectUid = project.get("uid", Number.class).longValue(); - - // Look for studies - MongoCollection oldStudyCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION); - MongoCollection orgStudyCol = orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION); - - queryMongo(oldStudyCol, Filters.eq("_project.uid", projectUid), Projections.exclude("_id"), studyDoc -> { - // Insert study in new organization collection - orgStudyCol.insertOne(studyDoc); - - long studyUid = studyDoc.get("uid", Number.class).longValue(); - - // Move data belonging to the study - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.SAMPLE_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.SAMPLE_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.INDIVIDUAL_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.INDIVIDUAL_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.COHORT_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.COHORT_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.FILE_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.FILE_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.FAMILY_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.FAMILY_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.PANEL_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.PANEL_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.CLINICAL_ANALYSIS_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.CLINICAL_ANALYSIS_COLLECTION)); - replicateData(studyUid, oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.INTERPRETATION_COLLECTION), - orgDatabase.getCollection(OrganizationMongoDBAdaptorFactory.INTERPRETATION_COLLECTION)); - - oldStudyCol.deleteOne(Filters.eq("uid", studyUid)); - }); - } - - // Copy migration history - MongoCollection oldMigrationCollection = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.MIGRATION_COLLECTION); - logger.info("Copying Migration data from {} to {}", oldMigrationCollection.getNamespace(), migrationCollection.getNamespace()); - migrateCollection(oldMigrationCollection, migrationCollection, new Document(), Projections.exclude("_id"), - (tmpDocument, bulk) -> bulk.add(new InsertOneModel<>(tmpDocument))); - - // Remove user from the source database - oldUserCollection.deleteOne(Filters.eq("id", ParamConstants.OPENCGA_USER_ID)); - } catch (CatalogException e) { - throw new RuntimeException(e); - } - }); - - // Create new organization - String opencgaToken = catalogManager.getUserManager().login(ParamConstants.ADMIN_ORGANIZATION, ParamConstants.OPENCGA_USER_ID, adminPassword).getToken(); - catalogManager.getOrganizationManager().create(new OrganizationCreateParams().setId(organizationId), null, opencgaToken); - OrganizationMongoDBAdaptorFactory orgFactory = mongoDBAdaptorFactory.getOrganizationMongoDBAdaptorFactory(organizationId); - String newDatabase = orgFactory.getMongoDataStore().getDatabaseName(); - - // Rename database to main organization database - MongoDataStore adminDatastore = mongoDBAdaptorFactory.getMongoManager().get("admin", mongoDBAdaptorFactory.getMongoDbConfiguration()); - for (String collectionName : oldDatastore.getCollectionNames()) { - logger.info("Renaming collection {} to {}", oldDatabase + "." + collectionName, newDatabase + "." + collectionName); - adminDatastore.getDb().runCommand(new Document() - .append("renameCollection", oldDatabase + "." + collectionName) - .append("to", newDatabase + "." + collectionName) - .append("dropTarget", true) - ); - } - - CatalogIOManager ioManager = new CatalogIOManager(configuration); - - Map organizationOwnerMap = new HashMap<>(); - organizationOwnerMap.put(ParamConstants.ADMIN_ORGANIZATION, ParamConstants.OPENCGA_USER_ID); - organizationOwnerMap.put(this.organizationId, this.userId); - - // Loop over all organizations to perform additional data model changes - for (String organizationId : mongoDBAdaptorFactory.getOrganizationIds()) { - ioManager.createOrganization(organizationId); - - MongoDatabase database = mongoDBAdaptorFactory.getMongoDataStore(organizationId).getDb(); - MongoCollection userCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION); - - // Extract projects from users - queryMongo(userCollection, new Document(), Projections.exclude("_id"), document -> { - List projects = document.getList("projects", Document.class); - if (CollectionUtils.isNotEmpty(projects)) { - // Create project directory - for (Document project : projects) { - Long projectUid = project.get("uid", Long.class); - try { - ioManager.createProject(organizationId, Long.toString(projectUid)); - } catch (CatalogIOException e) { - throw new RuntimeException("Couldn't create project folder for project '" + project.getString("fqn") + "'.", e); - } - } - - MongoCollection projectCol = database.getCollection(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION); - projectCol.insertMany(projects); - } - }); - - // Remove account type, projects and sharedProjects from User - for (String collection : Arrays.asList(OrganizationMongoDBAdaptorFactory.USER_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_USER_COLLECTION)) { - MongoCollection mongoCollection = database.getCollection(collection); - mongoCollection.updateMany(new Document(), - Updates.combine( - Updates.set("projects", Collections.emptyList()), - Updates.set("organization", organizationId), - Updates.unset("sharedProjects"), - Updates.unset("account.type") - ) - ); - } - - // Add owner as admin of every study and remove _ownerId field - String ownerId = organizationOwnerMap.get(organizationId); - for (String collection : Arrays.asList(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) { - MongoCollection mongoCollection = database.getCollection(collection); - mongoCollection.updateMany( - Filters.eq(StudyDBAdaptor.QueryParams.GROUP_ID.key(), ParamConstants.ADMINS_GROUP), - Updates.combine( - Updates.unset("_ownerId"), - Updates.push("groups.$.userIds", ownerId) - )); - } - - // Set organization counter - MongoCollection orgCol = database.getCollection(OrganizationMongoDBAdaptorFactory.ORGANIZATION_COLLECTION); - - List authOrigins = new ArrayList<>(); - // Super admins organization will only have one authentication origin - internal - if (!ParamConstants.ADMIN_ORGANIZATION.equals(organizationId)) { - if (configuration.getAuthentication() != null && CollectionUtils.isNotEmpty(configuration.getAuthentication().getAuthenticationOrigins())) { - for (AuthenticationOrigin authenticationOrigin : configuration.getAuthentication().getAuthenticationOrigins()) { - if (authenticationOrigin.getType().equals(AuthenticationOrigin.AuthenticationType.OPENCGA) - && authenticationOrigin.getId().equals(CatalogAuthenticationManager.INTERNAL)) { - continue; - } - authenticationOrigin.setAlgorithm(algorithm); - authenticationOrigin.setSecretKey(secretKey); - authenticationOrigin.setExpiration(3600); - authOrigins.add(convertToDocument(authenticationOrigin)); - } - } - } - authOrigins.add(convertToDocument(CatalogAuthenticationManager.createRandomInternalAuthenticationOrigin())); - - // Set organization counter, owner and authOrigins - orgCol.updateOne(Filters.eq("id", organizationId), Updates.combine( - Updates.set("_idCounter", counter), - Updates.set("owner", ownerId), - Updates.set("configuration.authenticationOrigins", authOrigins) - )); - } - - // If the user didn't want to use the userId as the new organization id, we then need to change all the fqn's - if (changeOrganizationId) { - logger.info("New organization id '{}' is different from original userId '{}'. Changing FQN's from projects and studies" - , this.organizationId, this.userId); - changeFqns(); - } - - // Skip current migration for both organizations - catalogManager.getMigrationManager().skipPendingMigrations(ParamConstants.ADMIN_ORGANIZATION, opencgaToken); - catalogManager.getMigrationManager().skipPendingMigrations(organizationId, opencgaToken); - } - - private void changeFqns() throws CatalogDBException, MigrationException { - this.dbAdaptorFactory = this.mongoDBAdaptorFactory; - String date = TimeUtils.getTime(); - - StorageEngineFactory storageEngineFactory = StorageEngineFactory.get(readStorageConfiguration()); - - // Change project fqn's - for (String projectCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, - OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION)) { - migrateCollection(projectCol, new Document(), Projections.include("_id", "id", "fqn", "internal.datastores.variant"), (document, bulk) -> { - String projectId = document.getString("id"); - String oldProjectFqn = document.getString("fqn"); - String newProjectFqn = FqnUtils.buildFqn(this.organizationId, projectId); - logger.info("Changing project fqn from '{}' to '{}'", oldProjectFqn, newProjectFqn); - - Document set = new Document() - .append("fqn", newProjectFqn) - .append("attributes.OPENCGA.3_0_0", new Document() - .append("date", date) - .append("oldFqn", oldProjectFqn) - ); - - Document internal = document.get("internal", Document.class); - if (internal != null) { - Document datastores = internal.get("datastores", Document.class); - if (datastores != null) { - Document variant = datastores.get("variant", Document.class); - DataStore dataStore; - boolean updateVariant = false; - if (variant == null) { - dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldProjectFqn); - updateVariant = true; - } else { - dataStore = JacksonUtils.getDefaultObjectMapper().convertValue(variant, DataStore.class); - } - // Update only if the project exists in the variant storage - try (VariantStorageEngine variantStorageEngine = storageEngineFactory - .getVariantStorageEngine(dataStore.getStorageEngine(), dataStore.getDbName())) { - logger.info("Project '{}' exists in the variant storage.", oldProjectFqn); - if (variantStorageEngine.getMetadataManager().exists()) { - if (updateVariant) { - logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldProjectFqn); - set.append("internal.datastores.variant", new Document() - .append("storageEngine", dataStore.getStorageEngine()) - .append("dbName", dataStore.getDbName()) - .append("options", new Document())); - } else { - logger.info("Datastore variant at project '{}': {}", oldProjectFqn, datastores); - } - - for (String oldStudyFqn : variantStorageEngine.getMetadataManager().getStudies().keySet()) { - String study = FqnUtils.parse(oldStudyFqn).getStudy(); - String newStudyFqn = FqnUtils.buildFqn(this.organizationId, projectId, study); - logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newStudyFqn); - variantStorageEngine.getMetadataManager().updateStudyMetadata(oldStudyFqn, studyMetadata -> { - studyMetadata.setName(newStudyFqn); - studyMetadata.getAttributes().put("OPENCGA_3_0_0", new ObjectMap() - .append("date", date) - .append("oldFqn", oldStudyFqn) - ); - }); - } - } else { - logger.info("Project '{}' does not exist in the variant storage. Skipping", oldProjectFqn); - } - } catch (StorageEngineException | IOException e) { - throw new RuntimeException(e); - } - } - } - - bulk.add(new UpdateOneModel<>( - Filters.eq("_id", document.get("_id")), - new Document("$set", set)) - ); - logger.info("-------"); - }); - } - - MongoDatabase database = mongoDBAdaptorFactory.getMongoDataStore(organizationId).getDb(); - MongoCollection jobCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION); - MongoCollection jobDeletedCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.DELETED_JOB_COLLECTION); - - // Change study fqn's - for (String studyCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, - OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) { - migrateCollection(studyCol, new Document(), Projections.include("_id", "uid", "fqn"), (document, bulk) -> { - long studyUid = document.get("uid", Number.class).longValue(); - - String oldStudyFqn = document.getString("fqn"); - FqnUtils.FQN oldFqnInstance = FqnUtils.parse(oldStudyFqn); - String newFqn = FqnUtils.buildFqn(this.organizationId, oldFqnInstance.getProject(), oldFqnInstance.getStudy()); - logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newFqn); - bulk.add(new UpdateOneModel<>( - Filters.eq("_id", document.get("_id")), - new Document("$set", new Document() - .append("fqn", newFqn) - .append("attributes.OPENCGA.3_0_0", new Document() - .append("date", date) - .append("oldFqn", oldStudyFqn) - ) - )) - ); - - // Ensure all jobs have attributes field - Bson jobQuery = Filters.eq("attributes", null); - Bson update = new Document("$set", new Document("attributes", new Document())); - jobCollection.updateMany(jobQuery, update); - jobDeletedCollection.updateMany(jobQuery, update); - - // Change fqn in all jobs that were pointing to this study - jobQuery = Filters.eq("studyUid", studyUid); - update = new Document("$set", new Document() - .append("study.id", newFqn) - .append("attributes.OPENCGA.3_0_0", new Document() - .append("date", date) - .append("oldStudyFqn", oldStudyFqn) - ) - ); - jobCollection.updateMany(jobQuery, update); - jobDeletedCollection.updateMany(jobQuery, update); - }); - } - } - - Set> getAvailableMigrations() { - Reflections reflections = new Reflections(new ConfigurationBuilder() - .setScanners( - new SubTypesScanner(), - new TypeAnnotationsScanner().filterResultsBy(s -> StringUtils.equals(s, Migration.class.getName())) - ) - .addUrls(getUrls()) - .filterInputsBy(input -> input != null && input.endsWith(".class")) - ); - - Set> migrations = reflections.getSubTypesOf(MigrationTool.class); - migrations.removeIf(c -> Modifier.isAbstract(c.getModifiers())); - - // Validate unique ids and rank - Map> versionIdMap = new HashMap<>(); - - for (Class migration : migrations) { - Migration annotation = migration.getAnnotation(Migration.class); - - if (!versionIdMap.containsKey(annotation.version())) { - versionIdMap.put(annotation.version(), new HashSet<>()); - } - if (versionIdMap.get(annotation.version()).contains(annotation.id())) { - throw new IllegalStateException("Found duplicated migration id '" + annotation.id() + "' in version " - + annotation.version()); - } - if (String.valueOf(annotation.date()).length() != 8) { - throw new IllegalStateException("Found unexpected date '" + annotation.date() + "' in migration '" + annotation.id() - + "' from version " + annotation.version() + ". Date format is YYYYMMDD."); - } - versionIdMap.get(annotation.version()).add(annotation.id()); - } - - return migrations; - } - - private Collection getUrls() { - Collection urls = new LinkedList<>(); - for (URL url : ClasspathHelper.forPackage("org.opencb.opencga")) { - String name = url.getPath().substring(url.getPath().lastIndexOf('/') + 1); - if (name.isEmpty() || (name.contains("opencga") && !name.contains("opencga-hadoop-shaded"))) { - urls.add(url); - } - } - return urls; - } - - private void replicateData(long studyUid, MongoCollection sourceCol, MongoCollection targetCol) { - // Move data to the new collection - logger.info("Moving data from {} to {}", sourceCol.getNamespace(), targetCol.getNamespace()); - migrateCollection(sourceCol, targetCol, Filters.eq("studyUid", studyUid), Projections.exclude("_id"), - (document, bulk) -> bulk.add(new InsertOneModel<>(document))); - // Remove data from the source collection - sourceCol.deleteMany(Filters.eq("studyUid", studyUid)); -======= ->>>>>>> release-3.x.x } } From f87686e43cb6914d34d93f9e534509471ccf0090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 27 Nov 2024 17:15:12 +0000 Subject: [PATCH 067/122] storage: Do not fail vairant-walker if no output is produced. #TASK-6722 --- .../analysis/variant/VariantWalkerTool.java | 12 ++++++----- .../variant/HadoopVariantStorageEngine.java | 20 ++++++++++++++++++- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index 68ad63d354..a37ca18486 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -87,11 +87,13 @@ protected void run() throws Exception { }); step("move-files", () -> { // Move files to final directory - IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0)); - for (URI uri : uris) { - String fileName = UriUtils.fileName(uri); - logger.info("Moving file -- " + fileName); - ioManager.move(uri, getOutDir().resolve(fileName).toUri()); + if (!uris.isEmpty()) { + IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0)); + for (URI uri : uris) { + String fileName = UriUtils.fileName(uri); + logger.info("Moving file -- " + fileName); + ioManager.move(uri, getOutDir().resolve(fileName).toUri()); + } } }); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 3affa2e3b9..c5d924e769 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -44,6 +44,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.StoragePipelineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; +import org.opencb.opencga.storage.core.io.managers.IOConnector; import org.opencb.opencga.storage.core.io.managers.IOConnectorProvider; import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -342,7 +343,24 @@ public List walkData(URI outputFile, VariantWriterFactory.VariantOutputForm .append(StreamVariantDriver.INPUT_FORMAT_PARAM, format.toString()) .append(StreamVariantDriver.OUTPUT_PARAM, outputFile) ), "Walk data"); - return Arrays.asList(outputFile, UriUtils.createUriSafe(outputFile.toString() + StreamVariantDriver.STDERR_TXT_GZ)); + List uris = new ArrayList<>(); + URI stderrFile = UriUtils.createUriSafe(outputFile.toString() + StreamVariantDriver.STDERR_TXT_GZ); + try { + IOConnector ioConnector = ioConnectorProvider.get(outputFile); + if (ioConnector.exists(outputFile)) { + uris.add(outputFile); + } else { + logger.warn("Output file not found: {}", outputFile); + } + if (ioConnector.exists(stderrFile)) { + uris.add(stderrFile); + } else { + logger.warn("Stderr file not found: {}", stderrFile); + } + } catch (IOException e) { + throw new StorageEngineException("Error checking output file", e); + } + return uris; } @Override From a389e10115d75e9a023cebf63ac28e5e1313bf16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 27 Nov 2024 17:16:52 +0000 Subject: [PATCH 068/122] storage: Split PhoenixInputSplits into smaller splits. #TASK-6722 --- .../variant/HadoopVariantStorageOptions.java | 1 + .../variant/mr/CustomPhoenixInputFormat.java | 52 +++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java index 268caaf925..37331233aa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java @@ -42,6 +42,7 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { MR_HBASE_SCAN_CACHING("storage.hadoop.mr.scan.caching", 50), MR_HBASE_SCAN_MAX_COLUMNS("storage.hadoop.mr.scan.maxColumns", 25000), MR_HBASE_SCAN_MAX_FILTERS("storage.hadoop.mr.scan.maxFilters", 2000), + MR_HBASE_PHOENIX_SCAN_SPLIT("storage.hadoop.mr.phoenix.scanSplit", 5), /** * MapReduce executor. Could be either 'system' or 'ssh'. diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java index 5b280facb0..b8e34933c9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java @@ -7,6 +7,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.db.DBWritable; @@ -21,11 +22,16 @@ import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.util.PhoenixRuntime; import org.opencb.opencga.storage.hadoop.HBaseCompat; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.Closeable; import java.io.IOException; import java.sql.Connection; import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Properties; @@ -39,6 +45,7 @@ */ public class CustomPhoenixInputFormat extends InputFormat { private static final Log LOG = LogFactory.getLog(CustomPhoenixInputFormat.class); + private static Logger logger = LoggerFactory.getLogger(CustomPhoenixInputFormat.class); @Override public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) @@ -58,6 +65,20 @@ public CloseValueRecordReader(RecordReader recordReader) { super(recordReader, v -> v); } + @Override + public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { + super.initialize(split, context); + if (split instanceof PhoenixInputSplit) { + PhoenixInputSplit phoenixInputSplit = (PhoenixInputSplit) split; + logger.info("Key range : " + phoenixInputSplit.getKeyRange()); + logger.info("Split: " + phoenixInputSplit.getScans().size() + " scans"); + int i = 0; + for (Scan scan : phoenixInputSplit.getScans()) { + logger.info("[{}] Scan: {}", ++i, scan); + } + } + } + @Override public void close() throws IOException { V currentValue; @@ -78,16 +99,41 @@ public List getSplits(JobContext context) throws IOException, Interr final Configuration configuration = context.getConfiguration(); final QueryPlan queryPlan = getQueryPlan(context, configuration); final List allSplits = queryPlan.getSplits(); - final List splits = generateSplits(queryPlan, allSplits); + final List splits = generateSplits(queryPlan, allSplits, configuration); return splits; } - private List generateSplits(final QueryPlan qplan, final List splits) throws IOException { + private List generateSplits(final QueryPlan qplan, final List splits, Configuration configuration) + throws IOException { Preconditions.checkNotNull(qplan); Preconditions.checkNotNull(splits); final List psplits = Lists.newArrayListWithExpectedSize(splits.size()); for (List scans : qplan.getScans()) { - psplits.add(new PhoenixInputSplit(scans)); + if (scans.size() == 1) { + // Split scans into multiple smaller scans + int numScans = configuration.getInt(HadoopVariantStorageOptions.MR_HBASE_PHOENIX_SCAN_SPLIT.key(), + HadoopVariantStorageOptions.MR_HBASE_PHOENIX_SCAN_SPLIT.defaultValue()); + List splitScans = new ArrayList<>(numScans); + Scan scan = scans.get(0); + byte[] startRow = scan.getStartRow(); + byte[] stopRow = scan.getStopRow(); + if (startRow != null && startRow.length != 0 && stopRow != null && stopRow.length != 0) { + byte[][] ranges = Bytes.split(startRow, stopRow, numScans - 1); + for (int i = 1; i < ranges.length; i++) { + Scan splitScan = new Scan(scan); + splitScan.withStartRow(ranges[i - 1]); + splitScan.withStopRow(ranges[i], false); + splitScans.add(splitScan); + } + } else { + splitScans.add(scan); + } + for (Scan splitScan : splitScans) { + psplits.add(new PhoenixInputSplit(Collections.singletonList(splitScan))); + } + } else { + psplits.add(new PhoenixInputSplit(scans)); + } } return psplits; } From f4530908d26dca9e4acd14baa7d50a3ef9766205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 27 Nov 2024 17:32:49 +0000 Subject: [PATCH 069/122] storage: Improve log message. #TASK-6722 --- .../storage/hadoop/variant/executors/SshMRExecutor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index 57a391a24d..cd61a522a9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -157,6 +157,8 @@ private MapReduceOutputFile initMrOutput(String executable, String[] args) throw String output = argsList.get(outputIdx + 1); URI outputUri = UriUtils.createUriSafe(output); if (MapReduceOutputFile.isLocal(outputUri)) { + logger.info("This MapReduce will produce some output. Change output location from file:// to a temporary hdfs:// file" + + " so it can be copied to the local filesystem after the execution"); try { int i = executable.lastIndexOf('.'); String tempFilePrefix; @@ -171,7 +173,6 @@ private MapReduceOutputFile initMrOutput(String executable, String[] args) throw } catch (IOException e) { throw new StorageEngineException(e.getMessage(), e); } - logger.info("Change output from file:// to hdfs://. Using MapReduceOutputFile: " + mrOutput.getOutdir()); // Replace output path with the temporary path argsList.set(outputIdx + 1, mrOutput.getOutdir().toString()); } From 47535c1a2ee74e3f810434507c894adca0c5b0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 28 Nov 2024 10:38:27 +0000 Subject: [PATCH 070/122] storage: Add HadoopVariantWalkerTest. #TASK-6722 --- .../analysis/variant/VariantWalkerTool.java | 3 + .../variant/HadoopVariantStorageEngine.java | 7 +- .../variant/executors/MRExecutorFactory.java | 2 +- .../variant/mr/StreamVariantMapper.java | 2 +- .../src/main/python/requirements.txt | 0 .../VariantHadoopStoragePipelineTest.java | 49 ------- .../walker/HadoopVariantWalkerTest.java | 128 ++++++++++++++++++ .../test/resources/variantWalker/Dockerfile | 17 +++ 8 files changed, 156 insertions(+), 52 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/requirements.txt create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index a37ca18486..0dd6f10c34 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -54,6 +54,9 @@ protected void check() throws Exception { } format = VariantWriterFactory.toOutputFormat(toolParams.getFileFormat(), toolParams.getOutputFileName()); + if (format.isBinary()) { + throw new IllegalArgumentException("Binary format not supported for VariantWalkerTool"); + } if (!format.isPlain()) { format = format.inPlain(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index c5d924e769..137ee95566 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -321,7 +321,12 @@ protected VariantExporter newVariantExporter(VariantMetadataFactory metadataFact public List walkData(URI outputFile, VariantWriterFactory.VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) throws StorageEngineException { ParsedVariantQuery variantQuery = parseQuery(query, queryOptions); - int studyId = variantQuery.getStudyQuery().getDefaultStudy().getId(); + int studyId; + if (variantQuery.getStudyQuery().getDefaultStudy() == null) { + studyId = -1; + } else { + studyId = variantQuery.getStudyQuery().getDefaultStudy().getId(); + } ObjectMap params = new ObjectMap(getOptions()).appendAll(variantQuery.getQuery()).appendAll(variantQuery.getInputOptions()); params.remove(StreamVariantDriver.COMMAND_LINE_PARAM); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java index 335bbeb14a..8560847e13 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutorFactory.java @@ -15,7 +15,7 @@ */ public final class MRExecutorFactory { - private static Logger logger = LoggerFactory.getLogger(SshMRExecutor.class); + private static Logger logger = LoggerFactory.getLogger(MRExecutorFactory.class); private MRExecutorFactory() { } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 5132d49a7b..95d0e0fb8c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -278,7 +278,7 @@ private void dockerPruneImages() { LOG.info("Pruning docker images"); int maxImages = 5; Command command = new Command(new String[]{"bash", "-c", "[ $(docker image ls --format json | wc -l) -gt " + maxImages + " ] " - + "&& echo 'Run docker image prune' && docker image prune -f -a " + + "&& echo 'Run docker image prune' && docker image prune -f --all --filter label!=storage='do_not_delete'" + "|| echo 'Skipping docker image prune. Less than " + maxImages + " images.'"}, Collections.emptyMap()); command.run(); int ecode = command.getExitValue(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/requirements.txt b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/requirements.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java index ac01326b1c..773f017a67 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java @@ -40,7 +40,6 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; -import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory; @@ -285,52 +284,4 @@ public void printVariants() throws Exception { VariantHbaseTestUtils.printVariants(studyMetadata, dbAdaptor, outDir); } - - @Test - public void exportCommand() throws Exception { - URI outdir = newOutputUri(); - List cmdList = Arrays.asList( - "export NUM_VARIANTS=0 ;", - "function setup() {", - " echo \"#SETUP\" ;", - " echo '## Something in single quotes' ; ", - "} ;", - "function map() {", -// " echo \"[$NUM_VARIANTS] $1\" 1>&2 ;", - " echo \"[$NUM_VARIANTS] \" 1>&2 ;", - " echo \"$1\" | jq .id ;", - " NUM_VARIANTS=$((NUM_VARIANTS+1)) ;", - "};", - "function cleanup() {", - " echo \"CLEANUP\" ;", - " echo \"NumVariants = $NUM_VARIANTS\" ;", - "};", - "setup;", - "while read -r i ; do ", - " map \"$i\" ; ", - "done; ", - "cleanup;"); - - // TODO: Add docker prune - - // String cmd = "bash -c '" + String.join("\n", cmdList) + "'"; - String cmd = String.join("\n", cmdList); - String cmdBash = "bash -ce '" + cmd.replace("'", "'\"'\"'") + "'"; - String cmdDocker = "docker run --rm -i opencb/opencga-base bash -ce '" + cmd.replace("'", "'\"'\"'") + "'"; - String cmdPython1 = "python variant_walker.py walker_example Cut --length 30"; -// String cmdPython2 = "python /home/jacobo/appl/opencga/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/python/* opencga-storage-hadoop-walker-example MyWalker --length 30"; - - -// variantStorageEngine.walkData(outdir.resolve("variant3.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdDocker); -// variantStorageEngine.walkData(outdir.resolve("variant2.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdBash); -// variantStorageEngine.walkData(outdir.resolve("variant1.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmd); -// variantStorageEngine.walkData(outdir.resolve("variant5.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdPython1); -// variantStorageEngine.walkData(outdir.resolve("variant8.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdPython2); -// variantStorageEngine.walkData(outdir.resolve("variant6.txt.gz"), VariantWriterFactory.VariantOutputFormat.VCF, new Query(), new QueryOptions(), cmdPython); -// variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmd); -// variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmdPython1); - variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "jcoll/my-python-app:latest", cmdPython1); - - } - } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java new file mode 100644 index 0000000000..02606a01ff --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java @@ -0,0 +1,128 @@ +package org.opencb.opencga.storage.hadoop.variant.walker; + + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.exec.Command; +import org.opencb.opencga.core.testclassification.duration.LongTests; +import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageTest; +import org.opencb.opencga.storage.hadoop.variant.VariantHbaseTestUtils; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +@Category(LongTests.class) +public class HadoopVariantWalkerTest extends VariantStorageBaseTest implements HadoopVariantStorageTest { + + @ClassRule + public static HadoopExternalResource externalResource = new HadoopExternalResource(); + private static String _dockerImage; + + @Before + public void before() throws Exception { + // Do not clear DB for each test + } + + @BeforeClass + public static void beforeClass() throws Exception { + HadoopVariantStorageEngine variantStorageManager = externalResource.getVariantStorageEngine(); + externalResource.clearDB(variantStorageManager.getDBName()); + +// URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); +// URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); + URI inputUri = VariantStorageBaseTest.getResourceUri("variant-test-file.vcf.gz"); + + StudyMetadata studyMetadata = VariantStorageBaseTest.newStudyMetadata(); + VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageManager, studyMetadata, + new ObjectMap(VariantStorageOptions.TRANSFORM_FORMAT.key(), "avro") + .append(VariantStorageOptions.ANNOTATE.key(), true) + .append(VariantStorageOptions.STATS_CALCULATE.key(), false) + ); + + VariantHbaseTestUtils.printVariants(variantStorageManager.getDBAdaptor(), newOutputUri()); + } + + @Test + public void exportCommand() throws Exception { + URI outdir = newOutputUri(); + + List cmdList = Arrays.asList( + "export NUM_VARIANTS=0 ;", + "function setup() {", + " echo \"#SETUP\" ;", + " echo '## Something in single quotes' ; ", + "} ;", + "function map() {", +// " echo \"[$NUM_VARIANTS] $1\" 1>&2 ;", + " echo \"[$NUM_VARIANTS] \" 1>&2 ;", + " echo \"$1\" | jq .id ;", + " NUM_VARIANTS=$((NUM_VARIANTS+1)) ;", + "};", + "function cleanup() {", + " echo \"CLEANUP\" ;", + " echo \"NumVariants = $NUM_VARIANTS\" ;", + "};", + "setup;", + "while read -r i ; do ", + " map \"$i\" ; ", + "done; ", + "cleanup;"); + + // String cmd = "bash -c '" + String.join("\n", cmdList) + "'"; + String cmd = String.join("\n", cmdList); + +// variantStorageEngine.walkData(outdir.resolve("variant3.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdDocker); +// variantStorageEngine.walkData(outdir.resolve("variant2.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdBash); + variantStorageEngine.walkData(outdir.resolve("variant1.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmd); +// variantStorageEngine.walkData(outdir.resolve("variant5.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdPython1); +// variantStorageEngine.walkData(outdir.resolve("variant8.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), cmdPython2); +// variantStorageEngine.walkData(outdir.resolve("variant6.txt.gz"), VariantWriterFactory.VariantOutputFormat.VCF, new Query(), new QueryOptions(), cmdPython); +// variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmd); +// variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), "opencb/opencga-base", cmdPython1); + } + + @Test + public void exportDocker() throws Exception { + URI outdir = newOutputUri(); + String dockerImage = buildDocker(); + + String cmdPython1 = "python variant_walker.py walker_example Cut --length 30"; + + variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), dockerImage, cmdPython1); + } + + private static String buildDocker() throws IOException { + if (HadoopVariantWalkerTest._dockerImage != null) { + return HadoopVariantWalkerTest._dockerImage; + } + String dockerImage = "local/variant-walker-test:latest"; + Path dockerFile = Paths.get(getResourceUri("variantWalker/Dockerfile").getPath()); +// Path pythonDir = Paths.get("../../opencga-storage-core/src/main/python").toAbsolutePath(); + Path pythonDir = Paths.get("src/main/python").toAbsolutePath(); + Command dockerBuild = new Command(new String[]{"docker", "build", "-t", dockerImage, "-f", dockerFile.toString(), pythonDir.toString()}, Collections.emptyMap()); + dockerBuild.run(); + assertEquals(0, dockerBuild.getExitValue()); + HadoopVariantWalkerTest._dockerImage = dockerImage; + return dockerImage; + } + + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile new file mode 100644 index 0000000000..bd9f5511ad --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile @@ -0,0 +1,17 @@ +# Use an official Python runtime as a parent image +FROM python:3.8-slim-buster + +# Set the working directory in the container to /app +WORKDIR /app + +ARG PYTHON_PATH="." + +LABEL storage="do_not_delete" + +RUN echo ${PYTHON_PATH} +# Copy the python directory contents into the container at /app +COPY ${PYTHON_PATH} /app + +# Install any needed packages specified in requirements.txt +RUN ls -la /app +RUN pip install --no-cache-dir -r requirements.txt From 003e467f57a8dbc8660f0735f214fd2766b198b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 28 Nov 2024 14:33:53 +0000 Subject: [PATCH 071/122] storage: Rename some variant-walker params. Add descriptions #TASK-6722 --- .../analysis/variant/VariantWalkerTool.java | 6 ++-- .../AnalysisVariantCommandExecutor.java | 3 +- .../AnalysisVariantCommandOptions.java | 17 +++++------ .../models/variant/VariantWalkerParams.java | 30 +++++++++---------- .../walker/HadoopVariantWalkerTest.java | 28 ++++++++++------- 5 files changed, 43 insertions(+), 41 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index 0dd6f10c34..5ab80d4f57 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -49,11 +49,11 @@ public class VariantWalkerTool extends OpenCgaTool { protected void check() throws Exception { super.check(); - if (StringUtils.isEmpty(toolParams.getFileFormat())) { - toolParams.setFileFormat(VariantWriterFactory.VariantOutputFormat.VCF.toString()); + if (StringUtils.isEmpty(toolParams.getInputFormat())) { + toolParams.setInputFormat(VariantWriterFactory.VariantOutputFormat.VCF.toString()); } - format = VariantWriterFactory.toOutputFormat(toolParams.getFileFormat(), toolParams.getOutputFileName()); + format = VariantWriterFactory.toOutputFormat(toolParams.getInputFormat(), toolParams.getOutputFileName()); if (format.isBinary()) { throw new IllegalArgumentException("Binary format not supported for VariantWalkerTool"); } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java index 3e3d5cd752..1410934a49 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java @@ -1990,9 +1990,8 @@ private RestResponse runWalker() throws Exception { putNestedIfNotEmpty(beanParams, "unknownGenotype", commandOptions.unknownGenotype, true); putNestedIfNotNull(beanParams, "sampleMetadata", commandOptions.sampleMetadata, true); putNestedIfNotNull(beanParams, "sort", commandOptions.sort, true); - putNestedIfNotEmpty(beanParams, "outdir", commandOptions.outdir, true); putNestedIfNotEmpty(beanParams, "outputFileName", commandOptions.outputFileName, true); - putNestedIfNotEmpty(beanParams, "fileFormat", commandOptions.fileFormat, true); + putNestedIfNotEmpty(beanParams, "inputFormat", commandOptions.inputFormat, true); putNestedIfNotEmpty(beanParams, "dockerImage", commandOptions.dockerImage, true); putNestedIfNotEmpty(beanParams, "commandLine", commandOptions.commandLine, true); putNestedIfNotEmpty(beanParams, "include", commandOptions.bodyInclude, true); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java index 998a7dc510..24a37e0422 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java @@ -3113,25 +3113,22 @@ public class RunWalkerCommandOptions { @Parameter(names = {"--sort"}, description = "The body web service sort parameter", required = false, help = true, arity = 0) public boolean sort = false; - @Parameter(names = {"--outdir"}, description = "The body web service outdir parameter", required = false, arity = 1) - public String outdir; - - @Parameter(names = {"--output-file-name"}, description = "The body web service outputFileName parameter", required = false, arity = 1) + @Parameter(names = {"--output-file-name"}, description = "Output file name", required = false, arity = 1) public String outputFileName; - @Parameter(names = {"--file-format"}, description = "The body web service fileFormat parameter", required = false, arity = 1) - public String fileFormat; + @Parameter(names = {"--input-format"}, description = "Format that will be used as input for the variant walker", required = false, arity = 1) + public String inputFormat; - @Parameter(names = {"--docker-image"}, description = "The body web service dockerImage parameter", required = false, arity = 1) + @Parameter(names = {"--docker-image"}, description = "Docker image to use", required = false, arity = 1) public String dockerImage; - @Parameter(names = {"--command-line"}, description = "The body web service commandLine parameter", required = false, arity = 1) + @Parameter(names = {"--command-line"}, description = "Command line to execute from the walker", required = false, arity = 1) public String commandLine; - @Parameter(names = {"--body_include"}, description = "The body web service include parameter", required = false, arity = 1) + @Parameter(names = {"--body_include"}, description = "Fields included in the response, whole JSON path must be provided", required = false, arity = 1) public String bodyInclude; - @Parameter(names = {"--body_exclude"}, description = "The body web service exclude parameter", required = false, arity = 1) + @Parameter(names = {"--body_exclude"}, description = "Fields excluded in the response, whole JSON path must be provided", required = false, arity = 1) public String bodyExclude; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java index ef541690fc..22a3d51f57 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantWalkerParams.java @@ -1,24 +1,24 @@ package org.opencb.opencga.core.models.variant; +import org.opencb.commons.annotations.DataField; +import org.opencb.opencga.core.api.ParamConstants; + public class VariantWalkerParams extends VariantQueryParams { public static final String DESCRIPTION = "Variant walker params"; - private String outdir; + + @DataField(description = "Output file name") private String outputFileName; - private String fileFormat; + @DataField(description = "Format that will be used as input for the variant walker") + private String inputFormat; + @DataField(description = "Docker image to use") private String dockerImage; + @DataField(description = "Command line to execute from the walker") private String commandLine; + @DataField(description = ParamConstants.INCLUDE_DESCRIPTION) private String include; + @DataField(description = ParamConstants.EXCLUDE_DESCRIPTION) private String exclude; - public String getOutdir() { - return outdir; - } - - public VariantWalkerParams setOutdir(String outdir) { - this.outdir = outdir; - return this; - } - public String getOutputFileName() { return outputFileName; } @@ -28,12 +28,12 @@ public VariantWalkerParams setOutputFileName(String outputFileName) { return this; } - public String getFileFormat() { - return fileFormat; + public String getInputFormat() { + return inputFormat; } - public VariantWalkerParams setFileFormat(String fileFormat) { - this.fileFormat = fileFormat; + public VariantWalkerParams setInputFormat(String inputFormat) { + this.inputFormat = inputFormat; return this; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java index 02606a01ff..0d9ab975ed 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java @@ -1,10 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.walker; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Test; +import org.junit.*; import org.junit.experimental.categories.Category; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; @@ -34,7 +31,7 @@ public class HadoopVariantWalkerTest extends VariantStorageBaseTest implements H @ClassRule public static HadoopExternalResource externalResource = new HadoopExternalResource(); - private static String _dockerImage; + private static String dockerImage; @Before public void before() throws Exception { @@ -58,6 +55,14 @@ public static void beforeClass() throws Exception { ); VariantHbaseTestUtils.printVariants(variantStorageManager.getDBAdaptor(), newOutputUri()); + + dockerImage = buildDocker(); + } + + @AfterClass + public static void afterClass() throws Exception { + pruneDocker(dockerImage); + dockerImage = null; } @Test @@ -102,7 +107,6 @@ public void exportCommand() throws Exception { @Test public void exportDocker() throws Exception { URI outdir = newOutputUri(); - String dockerImage = buildDocker(); String cmdPython1 = "python variant_walker.py walker_example Cut --length 30"; @@ -110,9 +114,6 @@ public void exportDocker() throws Exception { } private static String buildDocker() throws IOException { - if (HadoopVariantWalkerTest._dockerImage != null) { - return HadoopVariantWalkerTest._dockerImage; - } String dockerImage = "local/variant-walker-test:latest"; Path dockerFile = Paths.get(getResourceUri("variantWalker/Dockerfile").getPath()); // Path pythonDir = Paths.get("../../opencga-storage-core/src/main/python").toAbsolutePath(); @@ -120,9 +121,14 @@ private static String buildDocker() throws IOException { Command dockerBuild = new Command(new String[]{"docker", "build", "-t", dockerImage, "-f", dockerFile.toString(), pythonDir.toString()}, Collections.emptyMap()); dockerBuild.run(); assertEquals(0, dockerBuild.getExitValue()); - HadoopVariantWalkerTest._dockerImage = dockerImage; return dockerImage; } - + private static void pruneDocker(String dockerImage) throws IOException { + if (dockerImage != null) { + Command dockerPrune = new Command(new String[]{"docker", "rmi", dockerImage}, Collections.emptyMap()); + dockerPrune.run(); + assertEquals(0, dockerPrune.getExitValue()); + } + } } From 3c315a5004cdacfabddc397bb03c358bcd466758 Mon Sep 17 00:00:00 2001 From: mbleda Date: Thu, 28 Nov 2024 15:31:52 +0000 Subject: [PATCH 072/122] R client: updated to support XB 2.x organizations #TASK-6369 --- .gitignore | 2 +- opencga-client/src/main/R/DESCRIPTION | 12 +- opencga-client/src/main/R/NAMESPACE | 3 + .../src/main/R/R/OpencgaR-methods.R | 34 +- opencga-client/src/main/R/R/commons.R | 39 +- .../main/R/man/adminClient-OpencgaR-method.Rd | 48 +- .../R/man/alignmentClient-OpencgaR-method.Rd | 26 +- .../R/man/clinicalClient-OpencgaR-method.Rd | 155 ++-- .../R/man/cohortClient-OpencgaR-method.Rd | 40 +- .../R/man/familyClient-OpencgaR-method.Rd | 48 +- .../main/R/man/fileClient-OpencgaR-method.Rd | 90 +-- .../main/R/man/ga4ghClient-OpencgaR-method.Rd | 6 +- .../R/man/individualClient-OpencgaR-method.Rd | 72 +- .../main/R/man/jobClient-OpencgaR-method.Rd | 48 +- .../main/R/man/metaClient-OpencgaR-method.Rd | 8 + opencga-client/src/main/R/man/opencgaLogin.Rd | 9 +- .../R/man/operationClient-OpencgaR-method.Rd | 94 ++- .../man/organizationClient-OpencgaR-method.Rd | 124 ++++ .../main/R/man/panelClient-OpencgaR-method.Rd | 56 +- .../R/man/projectClient-OpencgaR-method.Rd | 34 +- .../R/man/sampleClient-OpencgaR-method.Rd | 50 +- .../main/R/man/studyClient-OpencgaR-method.Rd | 107 +-- .../main/R/man/userClient-OpencgaR-method.Rd | 36 +- .../R/man/variantClient-OpencgaR-method.Rd | 88 ++- .../src/main/R/vignettes/opencgaR.R | 423 +++++------ .../src/main/R/vignettes/opencgaR.Rmd | 110 +-- .../src/main/R/vignettes/opencgaR.html | 675 ++++++++++++++++++ 27 files changed, 1670 insertions(+), 767 deletions(-) create mode 100644 opencga-client/src/main/R/man/organizationClient-OpencgaR-method.Rd create mode 100644 opencga-client/src/main/R/vignettes/opencgaR.html diff --git a/.gitignore b/.gitignore index 876801ecbf..83aee9ed5e 100644 --- a/.gitignore +++ b/.gitignore @@ -86,7 +86,7 @@ git.properties *.Rcheck* opencga-client/src/main/R_packages opencga-client/src/main/opencgaR_*.tar.gz -opencga-client/src/main/R/vignettes/opencgaR.html +#opencga-client/src/main/R/vignettes/opencgaR.html opencga-client/src/main/R/vignettes/opencgaR.md /opencga-catalog/src/test/java/org/opencb/opencga/catalog/config/CatalogConfigurationTest.java diff --git a/opencga-client/src/main/R/DESCRIPTION b/opencga-client/src/main/R/DESCRIPTION index f41fc777fa..fd130957f4 100644 --- a/opencga-client/src/main/R/DESCRIPTION +++ b/opencga-client/src/main/R/DESCRIPTION @@ -1,9 +1,9 @@ Package: opencgaR Type: Package Title: Querying Opencga Data -Version: ${opencgar.version} -Author: Marta Bleda -Maintainer: Marta Bleda +Version: Version: ${opencgar.version} +Author: Zetta Genomics +Maintainer: Zetta Genomics Description: R client that contains classes and methods for working with Opencga data. License: GPL-3 @@ -17,7 +17,8 @@ Imports: configr, base64enc, methods, - rlang + rlang, + lubridate Suggests: BiocStyle, knitr, @@ -40,6 +41,7 @@ Collate: 'Meta-methods.R' 'OpencgaR-methods.R' 'Operation-methods.R' + 'Organization-methods.R' 'Panel-methods.R' 'Project-methods.R' 'RestResponse-methods.R' @@ -47,5 +49,5 @@ Collate: 'Study-methods.R' 'User-methods.R' 'Variant-methods.R' -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.2 VignetteBuilder: knitr diff --git a/opencga-client/src/main/R/NAMESPACE b/opencga-client/src/main/R/NAMESPACE index ad9a79a488..cf8377f585 100644 --- a/opencga-client/src/main/R/NAMESPACE +++ b/opencga-client/src/main/R/NAMESPACE @@ -23,6 +23,8 @@ export(mergeResults) export(opencgaHelp) export(opencgaLogin) export(opencgaLogout) +export(opencgaR) +export(restResponse) export(results) exportClasses(OpencgaR) exportClasses(RestResponse) @@ -37,6 +39,7 @@ exportMethods(individualClient) exportMethods(jobClient) exportMethods(metaClient) exportMethods(operationClient) +exportMethods(organizationClient) exportMethods(panelClient) exportMethods(projectClient) exportMethods(sampleClient) diff --git a/opencga-client/src/main/R/R/OpencgaR-methods.R b/opencga-client/src/main/R/R/OpencgaR-methods.R index a53439dc68..a538b2eb54 100644 --- a/opencga-client/src/main/R/R/OpencgaR-methods.R +++ b/opencga-client/src/main/R/R/OpencgaR-methods.R @@ -147,22 +147,23 @@ readConfFile <- function(conf){ #' #' \dontrun{ #' con <- initOpencgaR(host = "http://bioinfo.hpc.cam.ac.uk/opencga-prod/", version = "v2") -#' con <- opencgaLogin(opencga = con, userid = "demouser", passwd = "demouser", showToken = TRUE) +#' con <- opencgaLogin(opencga = con, userid = "xxx", passwd = "xxx", showToken = TRUE) #' #' # Configuration in list format #' conf <- list(version="v2", rest=list(host="http://bioinfo.hpc.cam.ac.uk/opencga-prod/")) #' con <- initOpencgaR(opencgaConfig=conf) -#' con <- opencgaLogin(opencga = con, userid = "demouser", passwd = demouser") +#' con <- opencgaLogin(opencga = con, userid = "xxx", passwd = xxx") #' #' # Configuration in file format ("YAML" or "JSON") #' conf <- "/path/to/conf/client-configuration.yml" #' con <- initOpencgaR(opencgaConfig=conf) -#' con <- opencgaLogin(opencga = con, userid = "demouser", passwd = "demouser") +#' con <- opencgaLogin(opencga = con, userid = "xxx", passwd = "xxx") #' } #' @export opencgaLogin <- function(opencga, userid=NULL, passwd=NULL, interactive=FALSE, - autoRenew=FALSE, verbose=FALSE, showToken=FALSE){ + autoRenew=FALSE, verbose=FALSE, showToken=FALSE, + organization=NULL){ if (class(opencga) == "OpencgaR"){ host <- slot(object = opencga, name = "host") version <- slot(object = opencga, name = "version") @@ -183,16 +184,18 @@ opencgaLogin <- function(opencga, userid=NULL, passwd=NULL, interactive=FALSE, if(requireNamespace("miniUI", quietly = TRUE) & requireNamespace("shiny", quietly = TRUE)){ user_login <- function() { ui <- miniUI::miniPage( - miniUI::gadgetTitleBar("Please enter your username and password"), + miniUI::gadgetTitleBar("Please enter your username, password and organization (optional):"), miniUI::miniContentPanel( shiny::textInput("username", "Username"), - shiny::passwordInput("password", "Password"))) + shiny::passwordInput("password", "Password"), + shiny::passwordInput("organization", "Organization"))) server <- function(input, output) { shiny::observeEvent(input$done, { user <- input$username pass <- input$password - res <- list(user=user, pass=pass) + org <- input$organization + res <- list(user=user, pass=pass, org=org) shiny::stopApp(res) }) shiny::observeEvent(input$cancel, { @@ -205,6 +208,7 @@ opencgaLogin <- function(opencga, userid=NULL, passwd=NULL, interactive=FALSE, cred <- user_login() userid <- cred$user passwd <- cred$pass + organization <- cred$org }else{ print("The 'miniUI' and 'shiny' packages are required to run the interactive login, please install it and try again. @@ -215,7 +219,11 @@ opencgaLogin <- function(opencga, userid=NULL, passwd=NULL, interactive=FALSE, # end interactive login # Send request - query <- httr::POST(baseurl, body = list(user=userid, password=passwd), encode = "json") + body_req <- list(user=userid, password=passwd) + if (!is.null(organization) && organization != ""){ + body_req <- append(x=body_req, values=list(organization=organization)) + } + query <- httr::POST(baseurl, body = body_req, encode = "json") # check query status httr::warn_for_status(query) @@ -235,14 +243,14 @@ opencgaLogin <- function(opencga, userid=NULL, passwd=NULL, interactive=FALSE, # get expiration time loginInfo <- unlist(strsplit(x=token, split="\\."))[2] loginInfojson <- jsonlite::fromJSON(rawToChar(base64enc::base64decode(what=loginInfo))) - loginTime <- as.character(as.POSIXct(loginInfojson$iat, origin="1970-01-01"), format="%Y%m%d%H%M%S") - expirationTime <- as.character(as.POSIXct(loginInfojson$exp, origin="1970-01-01"), format="%Y%m%d%H%M%S") + loginTime <- lubridate::as_datetime(as.POSIXct(loginInfojson$iat, origin="1970-01-01")) + expirationTime <- lubridate::as_datetime(as.POSIXct(loginInfojson$exp, origin="1970-01-01")) # Create session JSON sessionDf <- data.frame(host=opencga@host, version=opencga@version, user=opencga@user, token=opencga@token, refreshToken=opencga@refreshToken, - login=loginTime, expirationTime=expirationTime) + login=as.character(loginTime), expirationTime=as.character(expirationTime)) sessionJson <- jsonlite::toJSON(sessionDf) # Get system to define session directory @@ -267,10 +275,10 @@ opencgaLogin <- function(opencga, userid=NULL, passwd=NULL, interactive=FALSE, sessionTable <- rbind(sessionTable, sessionDf) write(x = jsonlite::toJSON(sessionTable), file = sessionFile) }else if (length(sessionTableMatch) == 1){ - sessionTable[sessionTableMatch, "login"] <- loginTime + sessionTable[sessionTableMatch, "login"] <- as.character(loginTime) sessionTable[sessionTableMatch, "token"] <- token sessionTable[sessionTableMatch, "refreshToken"] <- refreshToken - sessionTable[sessionTableMatch, "expirationTime"] <- expirationTime + sessionTable[sessionTableMatch, "expirationTime"] <- as.character(expirationTime) write(x = jsonlite::toJSON(sessionTable), file = sessionFile) }else{ stop(paste("There is more than one connection to this host in your rsession file. Please, remove any duplicated entries in", diff --git a/opencga-client/src/main/R/R/commons.R b/opencga-client/src/main/R/R/commons.R index f37893a76a..b3be5415a5 100644 --- a/opencga-client/src/main/R/R/commons.R +++ b/opencga-client/src/main/R/R/commons.R @@ -119,8 +119,8 @@ fetchOpenCGA <- function(object=object, category=NULL, categoryId=NULL, stop(paste("There is more than one connection to this host in your rsession file. Please, remove any duplicated entries in", object@sessionFile)) } - timeNow <- Sys.time() - timeLeft <- as.numeric(difftime(as.POSIXct(expirationTime, format="%Y%m%d%H%M%S"), timeNow, units="mins")) + timeNow <- lubridate::as_datetime(Sys.time()) + timeLeft <- as.numeric(difftime(expirationTime, timeNow, units="mins")) if (timeLeft > 0 & timeLeft <= 5){ print("INFO: Your session will expire in less than 5 minutes.") urlNewToken <- paste0(host, version, "users/login") @@ -270,11 +270,28 @@ callREST <- function(pathUrl, params, httpMethod, skip, token, as.queryParam, return(list(resp=resp, content=content)) } + +# A function to print Events +printEvents <- function (row){ + print(paste(row$type, ":", row$name, row$message)) +} + + ## A function to parse the json data into R dataframes parseResponse <- function(resp, content, verbose){ js <- jsonlite::fromJSON(content) if (resp$status_code == 200){ - if (!("warning" %in% js[[1]]) || js[[1]]$warning == ""){ + # QUERY SUCCESSFUL + if ("events" %in% names(js) == 0){ + if (length(js$events)){ + if (isTRUE(verbose)){ + print("Query successful!") + } + }else if (nrow(js$events) > 0){ + print("Query successful with warnings.") + printEvents(js$events) + } + }else if (!("warning" %in% js[[1]]) || js[[1]]$warning == ""){ if (isTRUE(verbose)){ print("Query successful!") } @@ -283,15 +300,19 @@ parseResponse <- function(resp, content, verbose){ print(paste("WARNING:", js[[1]]$warning)) } }else{ + # QUERY UNSUCCESSFUL print("Query unsuccessful.") print(paste("Category:", httr::http_status(resp)$category)) print(paste("Reason:", httr::http_status(resp)$reason)) - if (js[[1]]$warning != ""){ - print(paste("WARNING:", js[[1]]$warning)) - print() - } - if (js[[1]]$error != ""){ - stop(paste("ERROR:", js[[1]]$error)) + if ("events" %in% names(js)){ + printEvents(js$events) + }else{ + if (js[[1]]$warning != ""){ + print(paste("WARNING:", js[[1]]$warning)) + } + if (js[[1]]$error != ""){ + stop(paste("ERROR:", js[[1]]$error)) + } } } diff --git a/opencga-client/src/main/R/man/adminClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/adminClient-OpencgaR-method.Rd index a0bbbc24d7..b7b39bb0c0 100644 --- a/opencga-client/src/main/R/man/adminClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/adminClient-OpencgaR-method.Rd @@ -4,14 +4,14 @@ \alias{adminClient,OpencgaR-method} \title{AdminClient methods} \usage{ -\S4method{adminClient}{OpencgaR}(OpencgaR, endpointName, params = NULL, ...) +\S4method{adminClient}{OpencgaR}(OpencgaR, user, endpointName, params = NULL, ...) } \arguments{ -\item{fields}{Comma separated list of fields by which to group by.} +\item{user}{User ID.} -\item{entity}{Entity to be grouped by.} +\item{fields}{Comma separated list of fields by which to group by.} -\item{action}{Action performed.} +\item{entity}{Entity to be grouped by. Allowed values: \link{'AUDIT NOTE ORGANIZATION USER PROJECT STUDY FILE SAMPLE JOB INDIVIDUAL COHORT DISEASE_PANEL FAMILY CLINICAL_ANALYSIS INTERPRETATION VARIANT ALIGNMENT CLINICAL EXPRESSION RGA FUNCTIONAL'}} \item{before}{Object before update.} @@ -19,7 +19,13 @@ \item{date}{Date <,<=,>,>=(Format: yyyyMMddHHmmss) and yyyyMMddHHmmss-yyyyMMddHHmmss.} -\item{collection}{Collection to be indexed (file, sample, individual, family, cohort and/or job). If not provided, all of them will be indexed.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} + +\item{entryIds}{Comma separated list of entry ids.} + +\item{permissions}{Comma separated list of permissions to be retrieved.} + +\item{category}{Category corresponding to the id's provided.} \item{include}{Fields included in the response, whole JSON path must be provided.} @@ -31,11 +37,11 @@ \item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{user}{User ID.} +\item{authenticationId}{Authentication origin ID.} -\item{account}{Account type \link{GUEST, FULL, ADMINISTRATOR}.} +\item{organization}{Organization id.} -\item{authenticationId}{Authentication origin ID.} +\item{action}{Action to be performed: ADD or REMOVE user to/from groups. Allowed values: \link{'ADD REMOVE'}} \item{data}{JSON containing the parameters.} } @@ -44,13 +50,14 @@ This function implements the OpenCGA calls for managing Admin. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr groupByAudit \tab /{apiVersion}/admin/audit/groupBy \tab count, limit, fields\link{*}, entity\link{*}, action, before, after, date \cr - indexStatsCatalog \tab /{apiVersion}/admin/catalog/indexStats \tab collection \cr installCatalog \tab /{apiVersion}/admin/catalog/install \tab body\link{*} \cr - jwtCatalog \tab /{apiVersion}/admin/catalog/jwt \tab body\link{*} \cr + jwtCatalog \tab /{apiVersion}/admin/catalog/jwt \tab organization, body\link{*} \cr createUsers \tab /{apiVersion}/admin/users/create \tab body\link{*} \cr - importUsers \tab /{apiVersion}/admin/users/import \tab body\link{*} \cr - searchUsers \tab /{apiVersion}/admin/users/search \tab include, exclude, limit, skip, count, user, account, authenticationId \cr - syncUsers \tab /{apiVersion}/admin/users/sync \tab body\link{*} \cr + importUsers \tab /{apiVersion}/admin/users/import \tab organization, body\link{*} \cr + permissionsUsers \tab /{apiVersion}/admin/users/permissions \tab study, entryIds, permissions, category \cr + searchUsers \tab /{apiVersion}/admin/users/search \tab include, exclude, limit, skip, count, organization, user, authenticationId \cr + syncUsers \tab /{apiVersion}/admin/users/sync \tab organization, body\link{*} \cr + usersUpdateGroups \tab /{apiVersion}/admin/users/{user}/groups/update \tab organization, user\link{*}, action, body\link{*} \cr } } \section{Endpoint /{apiVersion}/admin/audit/groupBy}{ @@ -58,11 +65,6 @@ The following table summarises the available \emph{actions} for this client:\tab Group by operation. } -\section{Endpoint /{apiVersion}/admin/catalog/indexStats}{ - -Sync Catalog into the Solr. -} - \section{Endpoint /{apiVersion}/admin/catalog/install}{ Install OpenCGA database. @@ -83,6 +85,11 @@ Create a new user. Import users or a group of users from LDAP or AAD. } +\section{Endpoint /{apiVersion}/admin/users/permissions}{ + +User permissions. +} + \section{Endpoint /{apiVersion}/admin/users/search}{ User search method. @@ -93,6 +100,11 @@ User search method. Synchronise a group of users from an authentication origin with a group in a study from catalog. } +\section{Endpoint /{apiVersion}/admin/users/{user}/groups/update}{ + +Add or remove users from existing groups. +} + \seealso{ \url{http://docs.opencb.org/display/opencga/Using+OpenCGA} and the RESTful API documentation \url{http://bioinfo.hpc.cam.ac.uk/opencga-prod/webservices/} diff --git a/opencga-client/src/main/R/man/alignmentClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/alignmentClient-OpencgaR-method.Rd index 90628b933d..4be36d52af 100644 --- a/opencga-client/src/main/R/man/alignmentClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/alignmentClient-OpencgaR-method.Rd @@ -67,25 +67,31 @@ \item{jobTags}{Job tags.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} + \item{data}{Samtools parameters. Supported Samtools commands: sort, index, view, stats, flagstat, dict, faidx, depth, plot-bamstats.} } \description{ This function implements the OpenCGA calls for managing Analysis - Alignment. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr - runBwa \tab /{apiVersion}/analysis/alignment/bwa/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runCoverageIndex \tab /{apiVersion}/analysis/alignment/coverage/index/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - coverageQcGeneCoverageStatsRun \tab /{apiVersion}/analysis/alignment/coverage/qc/geneCoverageStats/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr + runBwa \tab /{apiVersion}/analysis/alignment/bwa/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runCoverageIndex \tab /{apiVersion}/analysis/alignment/coverage/index/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + coverageQcGeneCoverageStatsRun \tab /{apiVersion}/analysis/alignment/coverage/qc/geneCoverageStats/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr queryCoverage \tab /{apiVersion}/analysis/alignment/coverage/query \tab file\link{*}, study, region, gene, offset, onlyExons, range, windowSize, splitResults \cr ratioCoverage \tab /{apiVersion}/analysis/alignment/coverage/ratio \tab file1\link{*}, file2\link{*}, study, skipLog2, region, gene, offset, onlyExons, windowSize, splitResults \cr statsCoverage \tab /{apiVersion}/analysis/alignment/coverage/stats \tab file\link{*}, gene\link{*}, study, threshold \cr - runDeeptools \tab /{apiVersion}/analysis/alignment/deeptools/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runFastqc \tab /{apiVersion}/analysis/alignment/fastqc/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runIndex \tab /{apiVersion}/analysis/alignment/index/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runPicard \tab /{apiVersion}/analysis/alignment/picard/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runQc \tab /{apiVersion}/analysis/alignment/qc/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr + runDeeptools \tab /{apiVersion}/analysis/alignment/deeptools/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runFastqc \tab /{apiVersion}/analysis/alignment/fastqc/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runIndex \tab /{apiVersion}/analysis/alignment/index/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runPicard \tab /{apiVersion}/analysis/alignment/picard/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runQc \tab /{apiVersion}/analysis/alignment/qc/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr query \tab /{apiVersion}/analysis/alignment/query \tab limit, skip, count, file\link{*}, study, region, gene, offset, onlyExons, minMappingQuality, maxNumMismatches, maxNumHits, properlyPaired, maxInsertSize, skipUnmapped, skipDuplicated, regionContained, forceMDField, binQualities, splitResults \cr - runSamtools \tab /{apiVersion}/analysis/alignment/samtools/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr + runSamtools \tab /{apiVersion}/analysis/alignment/samtools/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr } } \section{Endpoint /{apiVersion}/analysis/alignment/bwa/run}{ @@ -140,7 +146,7 @@ Picard is a set of command line tools (in Java) for manipulating high-throughput \section{Endpoint /{apiVersion}/analysis/alignment/qc/run}{ -Compute quality control (QC) metrics for a given alignment file (including samtools stats, samtools flag stats, FastQC and HS metrics). +Compute quality control (QC) metrics for a given alignment file: samtools stats, samtools flag stats and FastQC metrics. } \section{Endpoint /{apiVersion}/analysis/alignment/query}{ diff --git a/opencga-client/src/main/R/man/clinicalClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/clinicalClient-OpencgaR-method.Rd index 7479b604d6..f2b25ce10a 100644 --- a/opencga-client/src/main/R/man/clinicalClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/clinicalClient-OpencgaR-method.Rd @@ -6,38 +6,49 @@ \usage{ \S4method{clinicalClient}{OpencgaR}( OpencgaR, - members, - interpretations, - interpretation, + annotationSet, clinicalAnalyses, clinicalAnalysis, + interpretation, + interpretations, + members, endpointName, params = NULL, ... ) } \arguments{ -\item{members}{Comma separated list of user or group IDs.} - -\item{interpretations}{Interpretation IDs of the Clinical Analysis.} - -\item{interpretation}{Interpretation ID.} +\item{annotationSet}{AnnotationSet ID to be updated.} \item{clinicalAnalyses}{Comma separated list of clinical analysis IDs.} \item{clinicalAnalysis}{Clinical analysis ID.} -\item{action}{Action to be performed \link{ADD, SET, REMOVE or RESET}.} +\item{interpretation}{Interpretation ID.} + +\item{interpretations}{Interpretation IDs of the Clinical Analysis.} + +\item{members}{Comma separated list of user or group IDs.} \item{propagate}{Propagate permissions to related families, individuals, samples and files.} +\item{variableSetId}{Variable set ID or name.} + +\item{path}{Path where the TSV file is located in OpenCGA or where it should be located.} + +\item{parents}{Flag indicating whether to create parent directories if they don't exist (only when TSV file was not previously associated).} + +\item{annotationSetId}{Annotation set id. If not provided, variableSetId will be used.} + \item{skipCreateDefaultInterpretation}{Flag to skip creating and initialise an empty default primary interpretation (Id will be '{clinicalAnalysisId}.1'). This flag is only considered if no Interpretation object is passed.} \item{sort}{Sort the results.} +\item{name}{Comma separated list of Interpretation names up to a maximum of 100.} + \item{clinicalAnalysisId}{Clinical Analysis id.} -\item{methodName}{Interpretation method name.} +\item{methodName}{Interpretation method name. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{primaryFindings}{Interpretation primary findings.} @@ -53,6 +64,12 @@ \item{jobTags}{Job tags.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} + \item{auxiliarIndex}{Index auxiliar collection to improve performance assuming RGA is completely indexed.} \item{includeIndividual}{Include only the comma separated list of individuals to the response.} @@ -97,7 +114,7 @@ \item{uuid}{Comma separated list of Clinical Analysis UUIDs up to a maximum of 100.} -\item{disorder}{Clinical Analysis disorder.} +\item{disorder}{Clinical Analysis disorder. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{files}{Clinical Analysis files.} @@ -123,14 +140,20 @@ \item{modificationDate}{Clinical Analysis Modification date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} +\item{dueDate}{Clinical Analysis due date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} + \item{qualityControlSummary}{Clinical Analysis quality control summary.} \item{release}{Release when it was created.} +\item{snapshot}{Snapshot value (Latest version of the entry in the specified release).} + \item{status}{Filter by status.} \item{internalStatus}{Filter by internal status.} +\item{annotation}{Annotation filters. Example: age>30;gender=FEMALE. For more information, please visit http://docs.opencb.org/display/opencga/AnnotationSets+1.4.0.} + \item{limit}{Number of results to be returned.} \item{skip}{Number of results to skip.} @@ -143,7 +166,9 @@ \item{savedFilter}{Use a saved filter at User level.} -\item{id}{List of IDs, these can be rs IDs (dbSNP) or variants in the format chrom:start:ref:alt, e.g. rs116600158,19:7177679:C:T.} +\item{includeInterpretation}{Interpretation ID to include the fields related to this interpretation.} + +\item{id}{List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T.} \item{region}{List of regions, these can be just a single chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000.} @@ -157,7 +182,7 @@ \item{fileData}{Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}\link{,;}* . If no file is specified, will use all files from "file" filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP.} -\item{sample}{Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: \link{ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous }. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. .} +\item{sample}{Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: \link{ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous }. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. .} \item{sampleData}{Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}\link{,;}* . If no sample is specified, will use all samples from "sample" or "genotype" filter. e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10.} @@ -185,7 +210,7 @@ \item{familyDisorder}{Specify the disorder to use for the family segregation.} -\item{familySegregation}{Filter by segregation mode from a given family. Accepted values: \link{ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous }.} +\item{familySegregation}{Filter by segregation mode from a given family. Accepted values: \link{ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous }.} \item{familyMembers}{Sub set of the members of a given family.} @@ -195,7 +220,7 @@ \item{ct}{List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. Accepts aliases 'loss_of_function' and 'protein_altering'.} -\item{xref}{List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, ...} +\item{xref}{List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...} \item{biotype}{List of biotypes, e.g. protein_coding.} @@ -203,11 +228,11 @@ \item{conservation}{Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1.} -\item{populationFrequencyAlt}{Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01.} +\item{populationFrequencyAlt}{Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01.} -\item{populationFrequencyRef}{Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01.} +\item{populationFrequencyRef}{Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01.} -\item{populationFrequencyMaf}{Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01.} +\item{populationFrequencyMaf}{Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01.} \item{transcriptFlag}{List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500.} @@ -251,9 +276,15 @@ \item{force}{Force deletion if the ClinicalAnalysis contains interpretations or is locked.} -\item{flagsAction}{Action to be performed if the array of flags is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{flagsAction}{Action to be performed if the array of flags is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{filesAction}{Action to be performed if the array of files is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{analystsAction}{Action to be performed if the array of analysts is being updated. Allowed values: \link{'ADD SET REMOVE'}} + +\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD SET REMOVE'}} + +\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD SET REMOVE RESET REPLACE'}} + +\item{flattenAnnotations}{Flatten the annotations?.} \item{deleted}{Boolean to retrieve deleted entries.} @@ -261,63 +292,71 @@ \item{version}{Version to revert to.} -\item{include}{Fields included in the response, whole JSON path must be provided.} +\item{primaryFindingsAction}{Action to be performed if the array of primary findings is being updated. Allowed values: \link{'ADD SET REMOVE REPLACE'}} -\item{exclude}{Fields excluded in the response, whole JSON path must be provided.} +\item{methodsAction}{Action to be performed if the array of methods is being updated. Allowed values: \link{'ADD SET REMOVE'}} + +\item{secondaryFindingsAction}{Action to be performed if the array of secondary findings is being updated. Allowed values: \link{'ADD SET REMOVE REPLACE'}} + +\item{panelsAction}{Action to be performed if the array of panels is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{study}{[\link{user@}project:]study ID.} +\item{setAs}{Set interpretation as. Allowed values: \link{'PRIMARY SECONDARY'}} -\item{primaryFindingsAction}{Action to be performed if the array of primary findings is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE', 'REPLACE'}} +\item{include}{Fields included in the response, whole JSON path must be provided.} -\item{methodsAction}{Action to be performed if the array of methods is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{exclude}{Fields excluded in the response, whole JSON path must be provided.} -\item{secondaryFindingsAction}{Action to be performed if the array of secondary findings is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE', 'REPLACE'}} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} -\item{commentsAction}{Action to be performed if the array of comments is being updated. To REMOVE or REPLACE, the date will need to be provided to identify the comment. Allowed values: \link{'ADD', 'REMOVE', 'REPLACE'}} +\item{commentsAction}{Action to be performed if the array of comments is being updated. Allowed values: \link{'ADD REMOVE REPLACE'}} -\item{panelsAction}{Action to be performed if the array of panels is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{supportingEvidencesAction}{Action to be performed if the array of supporting evidences is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{setAs}{Set interpretation as. Allowed values: \link{'PRIMARY', 'SECONDARY'}} +\item{filesAction}{Action to be performed if the array of files is being updated. Allowed values: \link{'ADD SET REMOVE'}} \item{includeResult}{Flag indicating to include the created or updated document result in the response.} -\item{data}{JSON containing clinical interpretation information.} +\item{data}{JSON containing clinical report information.} } \description{ This function implements the OpenCGA calls for managing Analysis - Clinical. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/analysis/clinical/acl/{members}/update \tab study, members\link{*}, action\link{*}, propagate, body\link{*} \cr + loadAnnotationSets \tab /{apiVersion}/analysis/clinical/annotationSets/load \tab study, variableSetId\link{*}, path\link{*}, parents, annotationSetId, body \cr updateClinicalConfiguration \tab /{apiVersion}/analysis/clinical/clinical/configuration/update \tab study, body \cr create \tab /{apiVersion}/analysis/clinical/create \tab include, exclude, study, skipCreateDefaultInterpretation, includeResult, body\link{*} \cr - distinct \tab /{apiVersion}/analysis/clinical/distinct \tab study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, qualityControlSummary, release, status, internalStatus, deleted, field\link{*} \cr - distinctInterpretation \tab /{apiVersion}/analysis/clinical/interpretation/distinct \tab study, id, uuid, clinicalAnalysisId, analystId, methodName, panels, primaryFindings, secondaryFindings, creationDate, modificationDate, status, internalStatus, release, field\link{*} \cr - searchInterpretation \tab /{apiVersion}/analysis/clinical/interpretation/search \tab include, exclude, limit, skip, sort, study, id, uuid, clinicalAnalysisId, analystId, methodName, panels, primaryFindings, secondaryFindings, creationDate, modificationDate, status, internalStatus, release \cr + distinct \tab /{apiVersion}/analysis/clinical/distinct \tab study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, dueDate, qualityControlSummary, release, snapshot, status, internalStatus, annotation, deleted, field\link{*} \cr + distinctInterpretation \tab /{apiVersion}/analysis/clinical/interpretation/distinct \tab study, id, uuid, name, clinicalAnalysisId, analystId, methodName, panels, primaryFindings, secondaryFindings, creationDate, modificationDate, status, internalStatus, release, field\link{*} \cr + searchInterpretation \tab /{apiVersion}/analysis/clinical/interpretation/search \tab include, exclude, limit, skip, sort, study, id, uuid, name, clinicalAnalysisId, analystId, methodName, panels, primaryFindings, secondaryFindings, creationDate, modificationDate, status, internalStatus, release \cr infoInterpretation \tab /{apiVersion}/analysis/clinical/interpretation/{interpretations}/info \tab include, exclude, interpretations\link{*}, study, version, deleted \cr - runInterpreterCancerTiering \tab /{apiVersion}/analysis/clinical/interpreter/cancerTiering/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runInterpreterTeam \tab /{apiVersion}/analysis/clinical/interpreter/team/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runInterpreterTiering \tab /{apiVersion}/analysis/clinical/interpreter/tiering/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runInterpreterZetta \tab /{apiVersion}/analysis/clinical/interpreter/zetta/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runInterpreterCancerTiering \tab /{apiVersion}/analysis/clinical/interpreter/cancerTiering/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runInterpreterExomiser \tab /{apiVersion}/analysis/clinical/interpreter/exomiser/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runInterpreterTeam \tab /{apiVersion}/analysis/clinical/interpreter/team/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runInterpreterTiering \tab /{apiVersion}/analysis/clinical/interpreter/tiering/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runInterpreterZetta \tab /{apiVersion}/analysis/clinical/interpreter/zetta/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + load \tab /{apiVersion}/analysis/clinical/load \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr aggregationStatsRga \tab /{apiVersion}/analysis/clinical/rga/aggregationStats \tab limit, skip, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study, field\link{*} \cr queryRgaGene \tab /{apiVersion}/analysis/clinical/rga/gene/query \tab include, exclude, limit, skip, count, includeIndividual, skipIndividual, limitIndividual, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study \cr summaryRgaGene \tab /{apiVersion}/analysis/clinical/rga/gene/summary \tab limit, skip, count, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study \cr - runRgaIndex \tab /{apiVersion}/analysis/clinical/rga/index/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, auxiliarIndex, body\link{*} \cr + runRgaIndex \tab /{apiVersion}/analysis/clinical/rga/index/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, auxiliarIndex, body\link{*} \cr queryRgaIndividual \tab /{apiVersion}/analysis/clinical/rga/individual/query \tab include, exclude, limit, skip, count, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study \cr summaryRgaIndividual \tab /{apiVersion}/analysis/clinical/rga/individual/summary \tab limit, skip, count, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study \cr queryRgaVariant \tab /{apiVersion}/analysis/clinical/rga/variant/query \tab include, exclude, limit, skip, count, includeIndividual, skipIndividual, limitIndividual, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study \cr summaryRgaVariant \tab /{apiVersion}/analysis/clinical/rga/variant/summary \tab limit, skip, count, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study \cr - search \tab /{apiVersion}/analysis/clinical/search \tab include, exclude, limit, skip, count, study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, qualityControlSummary, release, status, internalStatus, deleted \cr - actionableVariant \tab /{apiVersion}/analysis/clinical/variant/actionable \tab study, sample \cr - queryVariant \tab /{apiVersion}/analysis/clinical/variant/query \tab include, exclude, limit, skip, count, approximateCount, approximateCountSamplingSize, savedFilter, id, region, type, study, file, filter, qual, fileData, sample, sampleData, sampleAnnotation, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, trait \cr + search \tab /{apiVersion}/analysis/clinical/search \tab include, exclude, limit, skip, count, flattenAnnotations, study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, dueDate, qualityControlSummary, release, snapshot, status, internalStatus, annotation, deleted \cr + queryVariant \tab /{apiVersion}/analysis/clinical/variant/query \tab include, exclude, limit, skip, count, approximateCount, approximateCountSamplingSize, savedFilter, includeInterpretation, id, region, type, study, file, filter, qual, fileData, sample, sampleData, sampleAnnotation, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, trait \cr acl \tab /{apiVersion}/analysis/clinical/{clinicalAnalyses}/acl \tab clinicalAnalyses\link{*}, study, member, silent \cr delete \tab /{apiVersion}/analysis/clinical/{clinicalAnalyses}/delete \tab study, force, clinicalAnalyses\link{*} \cr - update \tab /{apiVersion}/analysis/clinical/{clinicalAnalyses}/update \tab include, exclude, clinicalAnalyses\link{*}, study, commentsAction, flagsAction, filesAction, panelsAction, includeResult, body\link{*} \cr - info \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/info \tab include, exclude, clinicalAnalysis\link{*}, study, deleted \cr + update \tab /{apiVersion}/analysis/clinical/{clinicalAnalyses}/update \tab include, exclude, clinicalAnalyses\link{*}, study, commentsAction, flagsAction, analystsAction, filesAction, panelsAction, annotationSetsAction, includeResult, body\link{*} \cr + updateAnnotationSetsAnnotations \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/annotationSets/{annotationSet}/annotations/update \tab clinicalAnalysis\link{*}, study, annotationSet\link{*}, action, body \cr + info \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/info \tab include, exclude, flattenAnnotations, clinicalAnalysis\link{*}, study, version, deleted \cr createInterpretation \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/interpretation/create \tab include, exclude, clinicalAnalysis\link{*}, study, setAs, includeResult, body\link{*} \cr clearInterpretation \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/interpretation/{interpretations}/clear \tab study, interpretations\link{*}, clinicalAnalysis\link{*} \cr deleteInterpretation \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/interpretation/{interpretations}/delete \tab study, clinicalAnalysis\link{*}, interpretations\link{*}, setAsPrimary \cr revertInterpretation \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/interpretation/{interpretation}/revert \tab study, clinicalAnalysis\link{*}, interpretation\link{*}, version\link{*} \cr updateInterpretation \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/interpretation/{interpretation}/update \tab include, exclude, study, primaryFindingsAction, methodsAction, secondaryFindingsAction, commentsAction, panelsAction, setAs, clinicalAnalysis\link{*}, interpretation\link{*}, includeResult, body\link{*} \cr + updateReport \tab /{apiVersion}/analysis/clinical/{clinicalAnalysis}/report/update \tab include, exclude, clinicalAnalysis\link{*}, study, commentsAction, supportingEvidencesAction, filesAction, includeResult, body\link{*} \cr } } \section{Endpoint /{apiVersion}/analysis/clinical/acl/{members}/update}{ @@ -325,6 +364,11 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } +\section{Endpoint /{apiVersion}/analysis/clinical/annotationSets/load}{ + +Load annotation sets from a TSV file. +} + \section{Endpoint /{apiVersion}/analysis/clinical/clinical/configuration/update}{ Update Clinical Analysis configuration. @@ -360,6 +404,11 @@ Clinical interpretation information. Run cancer tiering interpretation analysis. } +\section{Endpoint /{apiVersion}/analysis/clinical/interpreter/exomiser/run}{ + +Run exomiser interpretation analysis. +} + \section{Endpoint /{apiVersion}/analysis/clinical/interpreter/team/run}{ Run TEAM interpretation analysis. @@ -375,6 +424,11 @@ Run tiering interpretation analysis. Run Zetta interpretation analysis. } +\section{Endpoint /{apiVersion}/analysis/clinical/load}{ + +Load clinical analyses from a file. +} + \section{Endpoint /{apiVersion}/analysis/clinical/rga/aggregationStats}{ RGA aggregation stats. @@ -420,11 +474,6 @@ RGA variant summary stats. Clinical analysis search. } -\section{Endpoint /{apiVersion}/analysis/clinical/variant/actionable}{ - -Fetch actionable clinical variants. -} - \section{Endpoint /{apiVersion}/analysis/clinical/variant/query}{ Fetch clinical variants. @@ -445,6 +494,11 @@ Delete clinical analyses. Update clinical analysis attributes. } +\section{Endpoint /{apiVersion}/analysis/clinical/{clinicalAnalysis}/annotationSets/{annotationSet}/annotations/update}{ + +Update annotations from an annotationSet. +} + \section{Endpoint /{apiVersion}/analysis/clinical/{clinicalAnalysis}/info}{ Clinical analysis info. @@ -475,6 +529,11 @@ Revert to a previous interpretation version. Update interpretation fields. } +\section{Endpoint /{apiVersion}/analysis/clinical/{clinicalAnalysis}/report/update}{ + +Update clinical analysis report. +} + \seealso{ \url{http://docs.opencb.org/display/opencga/Using+OpenCGA} and the RESTful API documentation \url{http://bioinfo.hpc.cam.ac.uk/opencga-prod/webservices/} diff --git a/opencga-client/src/main/R/man/cohortClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/cohortClient-OpencgaR-method.Rd index 2ef531e788..dbcd842f02 100644 --- a/opencga-client/src/main/R/man/cohortClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/cohortClient-OpencgaR-method.Rd @@ -6,33 +6,23 @@ \usage{ \S4method{cohortClient}{OpencgaR}( OpencgaR, - members, annotationSet, - cohorts, cohort, + cohorts, + members, endpointName, params = NULL, ... ) } \arguments{ -\item{members}{Comma separated list of user or group ids.} - \item{annotationSet}{AnnotationSet ID to be updated.} -\item{cohorts}{Comma separated list of cohort ids.} - \item{cohort}{Cohort ID.} -\item{creationYear}{Creation year.} - -\item{creationMonth}{Creation month (JANUARY, FEBRUARY...).} - -\item{creationDay}{Creation day.} - -\item{creationDayOfWeek}{Creation day of week (MONDAY, TUESDAY...).} +\item{cohorts}{Comma separated list of cohort ids.} -\item{default}{Calculate default stats.} +\item{members}{Comma separated list of user or group ids.} \item{variableSetId}{Variable set ID or name.} @@ -46,7 +36,7 @@ \item{variable}{Deprecated: Use /generate web service and filter by annotation.} -\item{field}{Field for which to obtain the distinct values.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} \item{somatic}{Somatic sample.} @@ -54,7 +44,7 @@ \item{fileIds}{Comma separated list of file IDs, paths or UUIDs.} -\item{phenotypes}{Comma separated list of phenotype ids or names.} +\item{phenotypes}{Comma separated list of phenotype ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{snapshot}{Snapshot value (Latest version of the entry in the specified release).} @@ -64,9 +54,9 @@ \item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{id}{Comma separated list of cohort IDs up to a maximum of 100.} +\item{id}{Comma separated list of cohort IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{name}{Comma separated list of cohort names up to a maximum of 100.} +\item{name}{Comma separated list of cohort names up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list of cohort IDs up to a maximum of 100.} @@ -102,15 +92,15 @@ \item{exclude}{Fields excluded in the response, whole JSON path must be provided.} -\item{samplesAction}{Action to be performed if the array of samples is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{samplesAction}{Action to be performed if the array of samples is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD SET REMOVE'}} \item{includeResult}{Flag indicating to include the created or updated document result in the response.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} -\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD', 'SET', 'REMOVE', 'RESET', 'REPLACE'}} +\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD SET REMOVE RESET REPLACE'}} \item{data}{Json containing the map of annotations when the action is ADD, SET or REPLACE, a json with only the key 'remove' containing the comma separated variables to be removed as a value when the action is REMOVE or a json with only the key 'reset' containing the comma separated variables that will be set to the default value when the action is RESET.} } @@ -119,7 +109,6 @@ This function implements the OpenCGA calls for managing Cohorts. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/cohorts/acl/{members}/update \tab study, members\link{*}, action\link{*}, body\link{*} \cr - aggregationStats \tab /{apiVersion}/cohorts/aggregationStats \tab study, type, creationYear, creationMonth, creationDay, creationDayOfWeek, numSamples, status, release, annotation, default, field \cr loadAnnotationSets \tab /{apiVersion}/cohorts/annotationSets/load \tab study, variableSetId\link{*}, path\link{*}, parents, annotationSetId, body \cr create \tab /{apiVersion}/cohorts/create \tab include, exclude, study, variableSet, variable, includeResult, body\link{*} \cr distinct \tab /{apiVersion}/cohorts/distinct \tab study, id, name, uuid, type, creationDate, modificationDate, deleted, status, internalStatus, annotation, acl, samples, numSamples, release, field\link{*} \cr @@ -137,11 +126,6 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } -\section{Endpoint /{apiVersion}/cohorts/aggregationStats}{ - -Fetch catalog cohort stats. -} - \section{Endpoint /{apiVersion}/cohorts/annotationSets/load}{ Load annotation sets from a TSV file. diff --git a/opencga-client/src/main/R/man/familyClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/familyClient-OpencgaR-method.Rd index 7abe05ded3..9f2f796bc8 100644 --- a/opencga-client/src/main/R/man/familyClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/familyClient-OpencgaR-method.Rd @@ -6,37 +6,25 @@ \usage{ \S4method{familyClient}{OpencgaR}( OpencgaR, - members, annotationSet, families, family, + members, endpointName, params = NULL, ... ) } \arguments{ -\item{members}{Comma separated list of family members.} - \item{annotationSet}{AnnotationSet ID to be updated.} \item{families}{Comma separated list of family ids.} \item{family}{Family id.} -\item{propagate}{Propagate family permissions to related individuals and samples.} - -\item{creationYear}{Creation year.} - -\item{creationMonth}{Creation month (JANUARY, FEBRUARY...).} - -\item{creationDay}{Creation day.} - -\item{creationDayOfWeek}{Creation day of week (MONDAY, TUESDAY...).} - -\item{numMembers}{Number of members.} +\item{members}{Comma separated list of family members.} -\item{default}{Calculate default stats.} +\item{propagate}{Propagate family permissions to related individuals and samples. Allowed values: \link{'NO YES YES_AND_VARIANT_VIEW'}} \item{variableSetId}{Variable set ID or name.} @@ -46,7 +34,7 @@ \item{annotationSetId}{Annotation set id. If not provided, variableSetId will be used.} -\item{field}{Field for which to obtain the distinct values.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} \item{limit}{Number of results to be returned.} @@ -54,9 +42,9 @@ \item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{id}{Comma separated list family IDs up to a maximum of 100.} +\item{id}{Comma separated list family IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{name}{Comma separated list family names up to a maximum of 100.} +\item{name}{Comma separated list family names up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list family UUIDs up to a maximum of 100.} @@ -64,9 +52,9 @@ \item{samples}{Comma separated list of member's samples.} -\item{phenotypes}{Comma separated list of phenotype ids or names.} +\item{phenotypes}{Comma separated list of phenotype ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{disorders}{Comma separated list of disorder ids or names.} +\item{disorders}{Comma separated list of disorder ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{creationDate}{Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} @@ -100,15 +88,15 @@ \item{updateRoles}{Update the member roles within the family.} -\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{updatePedigreeGraph}{Update the family pedigree graph.} -\item{includeResult}{Flag indicating to include the created or updated document result in the response.} +\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{includeResult}{Flag indicating to include the created or updated document result in the response.} -\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD', 'SET', 'REMOVE', 'RESET', 'REPLACE'}} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} -\item{incVersion}{Create a new version of family.} +\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD SET REMOVE RESET REPLACE'}} \item{data}{Json containing the map of annotations when the action is ADD, SET or REPLACE, a json with only the key 'remove' containing the comma separated variables to be removed as a value when the action is REMOVE or a json with only the key 'reset' containing the comma separated variables that will be set to the default value when the action is RESET.} } @@ -117,7 +105,6 @@ This function implements the OpenCGA calls for managing Families. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/families/acl/{members}/update \tab study, members\link{*}, action\link{*}, propagate, body\link{*} \cr - aggregationStats \tab /{apiVersion}/families/aggregationStats \tab study, creationYear, creationMonth, creationDay, creationDayOfWeek, status, phenotypes, release, version, numMembers, expectedSize, annotation, default, field \cr loadAnnotationSets \tab /{apiVersion}/families/annotationSets/load \tab study, variableSetId\link{*}, path\link{*}, parents, annotationSetId, body \cr create \tab /{apiVersion}/families/create \tab include, exclude, study, members, includeResult, body\link{*} \cr distinct \tab /{apiVersion}/families/distinct \tab study, id, name, uuid, members, expectedSize, samples, phenotypes, disorders, creationDate, modificationDate, deleted, internalStatus, status, annotation, acl, release, snapshot, field\link{*} \cr @@ -125,8 +112,8 @@ The following table summarises the available \emph{actions} for this client:\tab acl \tab /{apiVersion}/families/{families}/acl \tab families\link{*}, study, member, silent \cr delete \tab /{apiVersion}/families/{families}/delete \tab study, families\link{*} \cr info \tab /{apiVersion}/families/{families}/info \tab include, exclude, flattenAnnotations, families\link{*}, study, version, deleted \cr - update \tab /{apiVersion}/families/{families}/update \tab include, exclude, families\link{*}, study, incVersion, updateRoles, annotationSetsAction, includeResult, body \cr - updateAnnotationSetsAnnotations \tab /{apiVersion}/families/{family}/annotationSets/{annotationSet}/annotations/update \tab family\link{*}, study, annotationSet\link{*}, action, incVersion, body \cr + update \tab /{apiVersion}/families/{families}/update \tab include, exclude, families\link{*}, study, updateRoles, updatePedigreeGraph, annotationSetsAction, includeResult, body \cr + updateAnnotationSetsAnnotations \tab /{apiVersion}/families/{family}/annotationSets/{annotationSet}/annotations/update \tab family\link{*}, study, annotationSet\link{*}, action, body \cr } } \section{Endpoint /{apiVersion}/families/acl/{members}/update}{ @@ -134,11 +121,6 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } -\section{Endpoint /{apiVersion}/families/aggregationStats}{ - -Fetch catalog family stats. -} - \section{Endpoint /{apiVersion}/families/annotationSets/load}{ Load annotation sets from a TSV file. diff --git a/opencga-client/src/main/R/man/fileClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/fileClient-OpencgaR-method.Rd index b744d1f2b1..ebafdfcd8b 100644 --- a/opencga-client/src/main/R/man/fileClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/fileClient-OpencgaR-method.Rd @@ -6,50 +6,32 @@ \usage{ \S4method{fileClient}{OpencgaR}( OpencgaR, - folder, - members, - file, annotationSet, + file, files, + folder, + members, endpointName, params = NULL, ... ) } \arguments{ -\item{folder}{Folder id or name. Paths must be separated by : instead of /.} - -\item{members}{Comma separated list of user or group ids.} +\item{annotationSet}{AnnotationSet ID to be updated.} \item{file}{File uuid, id, or name.} -\item{annotationSet}{AnnotationSet ID to be updated.} - \item{files}{Comma separated list of file ids, names or paths. Paths must be separated by : instead of /.} -\item{creationYear}{Creation year.} - -\item{creationMonth}{Creation month (JANUARY, FEBRUARY...).} - -\item{creationDay}{Creation day.} - -\item{creationDayOfWeek}{Creation day of week (MONDAY, TUESDAY...).} - -\item{software}{Software.} - -\item{experiment}{Experiment.} - -\item{numSamples}{Number of samples.} - -\item{numRelatedFiles}{Number of related files.} +\item{folder}{Folder id or name. Paths must be separated by : instead of /.} -\item{default}{Calculate default stats.} +\item{members}{Comma separated list of user or group ids.} \item{variableSetId}{Variable set ID or name.} \item{annotationSetId}{Annotation set id. If not provided, variableSetId will be used.} -\item{field}{Field for which to obtain the distinct values.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} \item{jobDependsOn}{Comma separated list of existing job IDs the job will depend on.} @@ -57,15 +39,21 @@ \item{jobTags}{Job tags.} -\item{id}{Comma separated list of file IDs up to a maximum of 100.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} + +\item{id}{Comma separated list of file IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list file UUIDs up to a maximum of 100.} -\item{name}{Comma separated list of file names.} +\item{name}{Comma separated list of file names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{path}{Comma separated list of paths.} +\item{path}{Comma separated list of paths. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{uri}{Comma separated list of uris.} +\item{uri}{Comma separated list of uris. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{type}{File type, either FILE or DIRECTORY.} @@ -87,7 +75,7 @@ \item{modificationDate}{Modification date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} -\item{tags}{Tags.} +\item{tags}{Tags. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{size}{File size.} @@ -101,11 +89,11 @@ \item{release}{Release when it was created.} -\item{filename}{File name to overwrite the input fileName.} +\item{fileName}{File name to overwrite the input fileName.} -\item{fileFormat}{File format.} +\item{fileFormat}{File format. Allowed values: \link{'VCF BCF GVCF TBI BIGWIG SAM BAM BAI CRAM CRAI FASTQ FASTA PED TAB_SEPARATED_VALUES COMMA_SEPARATED_VALUES XML PROTOCOL_BUFFER JSON AVRO PARQUET PDF IMAGE PLAIN BINARY NONE UNKNOWN'}} -\item{bioformat}{File bioformat.} +\item{bioformat}{File bioformat. Allowed values: \link{'MICROARRAY_EXPRESSION_ONECHANNEL_AGILENT MICROARRAY_EXPRESSION_ONECHANNEL_AFFYMETRIX MICROARRAY_EXPRESSION_ONECHANNEL_GENEPIX MICROARRAY_EXPRESSION_TWOCHANNELS_AGILENT MICROARRAY_EXPRESSION_TWOCHANNELS_GENEPIX DATAMATRIX_EXPRESSION IDLIST IDLIST_RANKED ANNOTATION_GENEVSANNOTATION OTHER_NEWICK OTHER_BLAST OTHER_INTERACTION OTHER_GENOTYPE OTHER_PLINK OTHER_VCF OTHER_PED VCF4 VARIANT ALIGNMENT COVERAGE SEQUENCE PEDIGREE REFERENCE_GENOME NONE UNKNOWN'}} \item{checksum}{Expected MD5 file checksum.} @@ -125,17 +113,15 @@ \item{deleted}{Boolean to retrieve deleted files.} -\item{sampleIdsAction}{Action to be performed if the array of samples is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{sampleIdsAction}{Action to be performed if the array of samples is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{relatedFilesAction}{Action to be performed if the array of relatedFiles is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{relatedFilesAction}{Action to be performed if the array of relatedFiles is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{tagsAction}{Action to be performed if the array of tags is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{tagsAction}{Action to be performed if the array of tags is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} - -\item{data}{Json containing the map of annotations when the action is ADD, SET or REPLACE, a json with only the key 'remove' containing the comma separated variables to be removed as a value when the action is REMOVE or a json with only the key 'reset' containing the comma separated variables that will be set to the default value when the action is RESET.} +\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD SET REMOVE RESET REPLACE'}} \item{pattern}{String pattern.} @@ -145,6 +131,8 @@ \item{offset}{Starting byte from which the file will be read.} +\item{data}{Parameters to modify.} + \item{lines}{Maximum number of lines to be returned up to a maximum of 1000.} \item{limit}{Number of results to be returned.} @@ -157,7 +145,7 @@ \item{exclude}{Fields excluded in the response, whole JSON path must be provided.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{maxDepth}{Maximum depth to get files from.} } @@ -166,18 +154,17 @@ This function implements the OpenCGA calls for managing Files. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/files/acl/{members}/update \tab study, members\link{*}, action\link{*}, body\link{*} \cr - aggregationStats \tab /{apiVersion}/files/aggregationStats \tab study, name, type, format, bioformat, creationYear, creationMonth, creationDay, creationDayOfWeek, status, release, external, size, software, experiment, numSamples, numRelatedFiles, annotation, default, field \cr loadAnnotationSets \tab /{apiVersion}/files/annotationSets/load \tab study, variableSetId\link{*}, path\link{*}, parents, annotationSetId, body \cr bioformats \tab /{apiVersion}/files/bioformats \tab \cr create \tab /{apiVersion}/files/create \tab study, parents, body\link{*} \cr distinct \tab /{apiVersion}/files/distinct \tab study, id, uuid, name, path, uri, type, bioformat, format, external, status, internalStatus, internalVariantIndexStatus, softwareName, directory, creationDate, modificationDate, description, tags, size, sampleIds, jobId, annotation, acl, deleted, release, field\link{*} \cr - fetch \tab /{apiVersion}/files/fetch \tab jobId, jobDescription, jobDependsOn, jobTags, study, body\link{*} \cr + fetch \tab /{apiVersion}/files/fetch \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body\link{*} \cr formats \tab /{apiVersion}/files/formats \tab \cr link \tab /{apiVersion}/files/link \tab study, parents, body\link{*} \cr - runLink \tab /{apiVersion}/files/link/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runPostlink \tab /{apiVersion}/files/postlink/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr + runLink \tab /{apiVersion}/files/link/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runPostlink \tab /{apiVersion}/files/postlink/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr search \tab /{apiVersion}/files/search \tab include, exclude, limit, skip, count, flattenAnnotations, study, id, uuid, name, path, uri, type, bioformat, format, external, status, internalStatus, internalVariantIndexStatus, softwareName, directory, creationDate, modificationDate, description, tags, size, sampleIds, jobId, annotation, acl, deleted, release \cr - upload \tab /{apiVersion}/files/upload \tab file, filename, fileFormat, bioformat, checksum, study, relativeFilePath, description, parents \cr + upload \tab /{apiVersion}/files/upload \tab file, fileName, fileFormat, bioformat, checksum, study, relativeFilePath, description, parents \cr acl \tab /{apiVersion}/files/{files}/acl \tab files\link{*}, study, member, silent \cr delete \tab /{apiVersion}/files/{files}/delete \tab study, files\link{*}, skipTrash \cr info \tab /{apiVersion}/files/{files}/info \tab include, exclude, flattenAnnotations, files\link{*}, study, deleted \cr @@ -188,6 +175,7 @@ The following table summarises the available \emph{actions} for this client:\tab grep \tab /{apiVersion}/files/{file}/grep \tab file\link{*}, study, pattern, ignoreCase, maxCount \cr head \tab /{apiVersion}/files/{file}/head \tab file\link{*}, study, offset, lines \cr image \tab /{apiVersion}/files/{file}/image \tab file\link{*}, study \cr + move \tab /{apiVersion}/files/{file}/move \tab include, exclude, file\link{*}, study, body\link{*} \cr refresh \tab /{apiVersion}/files/{file}/refresh \tab file\link{*}, study \cr tail \tab /{apiVersion}/files/{file}/tail \tab file\link{*}, study, lines \cr list \tab /{apiVersion}/files/{folder}/list \tab include, exclude, limit, skip, count, folder\link{*}, study \cr @@ -199,11 +187,6 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } -\section{Endpoint /{apiVersion}/files/aggregationStats}{ - -Fetch catalog file stats. -} - \section{Endpoint /{apiVersion}/files/annotationSets/load}{ Load annotation sets from a TSV file. @@ -309,6 +292,11 @@ Show the first lines of a file (up to a limit). Obtain the base64 content of an image. } +\section{Endpoint /{apiVersion}/files/{file}/move}{ + +Move file to a different path. +} + \section{Endpoint /{apiVersion}/files/{file}/refresh}{ Refresh metadata from the selected file or folder. Return updated files. diff --git a/opencga-client/src/main/R/man/ga4ghClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/ga4ghClient-OpencgaR-method.Rd index 37639477ca..0d6d4d5d88 100644 --- a/opencga-client/src/main/R/man/ga4ghClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/ga4ghClient-OpencgaR-method.Rd @@ -4,13 +4,13 @@ \alias{ga4ghClient,OpencgaR-method} \title{GA4GHClient methods} \usage{ -\S4method{ga4ghClient}{OpencgaR}(OpencgaR, study, file, endpointName, params = NULL, ...) +\S4method{ga4ghClient}{OpencgaR}(OpencgaR, file, study, endpointName, params = NULL, ...) } \arguments{ -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} - \item{file}{File id, name or path.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} + \item{referenceName}{Reference sequence name (Example: 'chr1', '1' or 'chrX'.} \item{start}{The start position of the range on the reference, 0-based, inclusive.} diff --git a/opencga-client/src/main/R/man/individualClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/individualClient-OpencgaR-method.Rd index aebbe1780c..5064663807 100644 --- a/opencga-client/src/main/R/man/individualClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/individualClient-OpencgaR-method.Rd @@ -6,45 +6,25 @@ \usage{ \S4method{individualClient}{OpencgaR}( OpencgaR, + annotationSet, + individual, individuals, members, - individual, - annotationSet, endpointName, params = NULL, ... ) } \arguments{ -\item{individuals}{Comma separated list of individual ids.} - -\item{members}{Comma separated list of user or group ids.} - -\item{individual}{Individual ID, name or UUID.} - \item{annotationSet}{AnnotationSet ID to be updated.} -\item{propagate}{Propagate individual permissions to related samples.} - -\item{hasFather}{Has father.} - -\item{hasMother}{Has mother.} - -\item{population}{Population.} - -\item{creationYear}{Creation year.} - -\item{creationMonth}{Creation month (JANUARY, FEBRUARY...).} - -\item{creationDay}{Creation day.} - -\item{creationDayOfWeek}{Creation day of week (MONDAY, TUESDAY...).} +\item{individual}{Individual ID, name or UUID.} -\item{numSamples}{Number of samples.} +\item{individuals}{Comma separated list of individual ids.} -\item{parentalConsanguinity}{Parental consanguinity.} +\item{members}{Comma separated list of user or group ids.} -\item{default}{Calculate default stats.} +\item{propagate}{Propagate individual permissions to related samples.} \item{variableSetId}{Variable set ID or name.} @@ -54,7 +34,7 @@ \item{annotationSetId}{Annotation set id. If not provided, variableSetId will be used.} -\item{field}{Field for which to obtain the distinct values.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} \item{limit}{Number of results to be returned.} @@ -62,11 +42,11 @@ \item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{id}{Comma separated list individual IDs up to a maximum of 100.} +\item{id}{Comma separated list individual IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list individual UUIDs up to a maximum of 100.} -\item{name}{Comma separated list individual names up to a maximum of 100.} +\item{name}{Comma separated list individual names up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{father}{Father ID, name or UUID.} @@ -80,15 +60,15 @@ \item{dateOfBirth}{Individual date of birth.} -\item{ethnicity}{Individual ethnicity.} +\item{ethnicity}{Individual ethnicity. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{disorders}{Comma separated list of disorder ids or names.} +\item{disorders}{Comma separated list of disorder ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{phenotypes}{Comma separated list of phenotype ids or names.} +\item{phenotypes}{Comma separated list of phenotype ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{populationName}{Population name.} +\item{populationName}{Population name. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{populationSubpopulation}{Subpopulation name.} +\item{populationSubpopulation}{Subpopulation name. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{karyotypicSex}{Individual karyotypic sex.} @@ -120,19 +100,17 @@ \item{deleted}{Boolean to retrieve deleted individuals.} -\item{samplesAction}{Action to be performed if the array of samples is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{samplesAction}{Action to be performed if the array of samples is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{phenotypesAction}{Action to be performed if the array of phenotypes is being updated \link{SET, ADD, REMOVE}. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{phenotypesAction}{Action to be performed if the array of phenotypes is being updated \link{SET, ADD, REMOVE}. Allowed values: \link{'ADD SET REMOVE'}} -\item{disordersAction}{Action to be performed if the array of disorders is being updated \link{SET, ADD, REMOVE}. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{disordersAction}{Action to be performed if the array of disorders is being updated \link{SET, ADD, REMOVE}. Allowed values: \link{'ADD SET REMOVE'}} -\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD SET REMOVE'}} \item{includeResult}{Flag indicating to include the created or updated document result in the response.} -\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD', 'SET', 'REMOVE', 'RESET', 'REPLACE'}} - -\item{incVersion}{Create a new version of individual.} +\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD SET REMOVE RESET REPLACE'}} \item{data}{Json containing the map of annotations when the action is ADD, SET or REPLACE, a json with only the key 'remove' containing the comma separated variables to be removed as a value when the action is REMOVE or a json with only the key 'reset' containing the comma separated variables that will be set to the default value when the action is RESET.} @@ -142,7 +120,7 @@ \item{flattenAnnotations}{Flatten the annotations?.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{degree}{Pedigree degree.} } @@ -151,7 +129,6 @@ This function implements the OpenCGA calls for managing Individuals. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/individuals/acl/{members}/update \tab study, members\link{*}, action\link{*}, propagate, body\link{*} \cr - aggregationStats \tab /{apiVersion}/individuals/aggregationStats \tab study, hasFather, hasMother, sex, karyotypicSex, ethnicity, population, creationYear, creationMonth, creationDay, creationDayOfWeek, status, lifeStatus, phenotypes, numSamples, parentalConsanguinity, release, version, annotation, default, field \cr loadAnnotationSets \tab /{apiVersion}/individuals/annotationSets/load \tab study, variableSetId\link{*}, path\link{*}, parents, annotationSetId, body \cr create \tab /{apiVersion}/individuals/create \tab include, exclude, study, samples, includeResult, body\link{*} \cr distinct \tab /{apiVersion}/individuals/distinct \tab study, id, uuid, name, familyIds, father, mother, samples, sex, ethnicity, dateOfBirth, disorders, phenotypes, populationName, populationSubpopulation, karyotypicSex, lifeStatus, internalStatus, status, deleted, creationDate, modificationDate, annotation, acl, release, snapshot, field\link{*} \cr @@ -159,8 +136,8 @@ The following table summarises the available \emph{actions} for this client:\tab acl \tab /{apiVersion}/individuals/{individuals}/acl \tab individuals\link{*}, study, member, silent \cr delete \tab /{apiVersion}/individuals/{individuals}/delete \tab force, study, individuals\link{*} \cr info \tab /{apiVersion}/individuals/{individuals}/info \tab include, exclude, flattenAnnotations, individuals\link{*}, study, version, deleted \cr - update \tab /{apiVersion}/individuals/{individuals}/update \tab include, exclude, individuals\link{*}, study, samplesAction, phenotypesAction, disordersAction, annotationSetsAction, incVersion, includeResult, body \cr - updateAnnotationSetsAnnotations \tab /{apiVersion}/individuals/{individual}/annotationSets/{annotationSet}/annotations/update \tab individual\link{*}, study, annotationSet\link{*}, action, incVersion, body \cr + update \tab /{apiVersion}/individuals/{individuals}/update \tab include, exclude, individuals\link{*}, study, samplesAction, phenotypesAction, disordersAction, annotationSetsAction, includeResult, body \cr + updateAnnotationSetsAnnotations \tab /{apiVersion}/individuals/{individual}/annotationSets/{annotationSet}/annotations/update \tab individual\link{*}, study, annotationSet\link{*}, action, body \cr relatives \tab /{apiVersion}/individuals/{individual}/relatives \tab include, exclude, flattenAnnotations, individual\link{*}, study, degree \cr } } @@ -169,11 +146,6 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } -\section{Endpoint /{apiVersion}/individuals/aggregationStats}{ - -Fetch catalog individual stats. -} - \section{Endpoint /{apiVersion}/individuals/annotationSets/load}{ Load annotation sets from a TSV file. diff --git a/opencga-client/src/main/R/man/jobClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/jobClient-OpencgaR-method.Rd index 1a6c95506d..8572e8d31d 100644 --- a/opencga-client/src/main/R/man/jobClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/jobClient-OpencgaR-method.Rd @@ -4,36 +4,18 @@ \alias{jobClient,OpencgaR-method} \title{JobClient methods} \usage{ -\S4method{jobClient}{OpencgaR}(OpencgaR, members, job, jobs, endpointName, params = NULL, ...) +\S4method{jobClient}{OpencgaR}(OpencgaR, job, jobs, members, endpointName, params = NULL, ...) } \arguments{ -\item{members}{Comma separated list of user or group ids.} - \item{job}{Job ID or UUID.} \item{jobs}{Comma separated list of job IDs or UUIDs up to a maximum of 100.} -\item{action}{Action to be performed \link{ADD, SET, REMOVE or RESET}.} - -\item{toolScope}{Tool scope.} - -\item{toolResource}{Tool resource.} - -\item{executorId}{Executor id.} - -\item{executorFramework}{Executor framework.} - -\item{creationYear}{Creation year.} - -\item{creationMonth}{Creation month (JANUARY, FEBRUARY...).} - -\item{creationDay}{Creation day.} - -\item{creationDayOfWeek}{Creation day of week (MONDAY, TUESDAY...).} +\item{members}{Comma separated list of user or group ids.} -\item{default}{Calculate default stats.} +\item{action}{Action to be performed \link{ADD, SET, REMOVE or RESET}. Allowed values: \link{'SET ADD REMOVE RESET'}} -\item{field}{Field for which to obtain the distinct values.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} \item{jobId}{Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided.} @@ -43,13 +25,15 @@ \item{jobTags}{Job tags.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + \item{skip}{Number of results to skip.} \item{count}{Get the total number of results matching the query. Deactivated by default.} \item{otherStudies}{Flag indicating the entries being queried can belong to any related study, not just the primary one.} -\item{id}{Comma separated list of job IDs up to a maximum of 100.} +\item{id}{Comma separated list of job IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list of job UUIDs up to a maximum of 100.} @@ -81,7 +65,7 @@ \item{userId}{User that created the job.} -\item{toolId}{Tool ID executed by the job.} +\item{toolId}{Tool ID executed by the job. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{member}{User or group id.} @@ -99,7 +83,7 @@ \item{offset}{Starting byte from which the file will be read.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{lines}{Maximum number of lines to be returned up to a maximum of 1000.} @@ -110,16 +94,16 @@ This function implements the OpenCGA calls for managing Jobs. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/jobs/acl/{members}/update \tab members\link{*}, action\link{*}, body\link{*} \cr - aggregationStats \tab /{apiVersion}/jobs/aggregationStats \tab study, toolId, toolScope, toolType, toolResource, userId, priority, tags, executorId, executorFramework, creationYear, creationMonth, creationDay, creationDayOfWeek, status, release, default, field \cr create \tab /{apiVersion}/jobs/create \tab study, body\link{*} \cr distinct \tab /{apiVersion}/jobs/distinct \tab study, otherStudies, id, uuid, toolId, toolType, userId, priority, status, internalStatus, creationDate, modificationDate, visited, tags, input, output, acl, release, deleted, field\link{*} \cr - retry \tab /{apiVersion}/jobs/retry \tab jobId, jobDescription, jobDependsOn, jobTags, study, body\link{*} \cr + retry \tab /{apiVersion}/jobs/retry \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, study, body\link{*} \cr search \tab /{apiVersion}/jobs/search \tab include, exclude, limit, skip, count, study, otherStudies, id, uuid, toolId, toolType, userId, priority, status, internalStatus, creationDate, modificationDate, visited, tags, input, output, acl, release, deleted \cr top \tab /{apiVersion}/jobs/top \tab limit, study, internalStatus, priority, userId, toolId \cr acl \tab /{apiVersion}/jobs/{jobs}/acl \tab jobs\link{*}, member, silent \cr delete \tab /{apiVersion}/jobs/{jobs}/delete \tab study, jobs\link{*} \cr info \tab /{apiVersion}/jobs/{jobs}/info \tab include, exclude, jobs\link{*}, study, deleted \cr update \tab /{apiVersion}/jobs/{jobs}/update \tab include, exclude, jobs\link{*}, study, includeResult, body \cr + kill \tab /{apiVersion}/jobs/{job}/kill \tab job\link{*}, study \cr headLog \tab /{apiVersion}/jobs/{job}/log/head \tab job\link{*}, study, offset, lines, type \cr tailLog \tab /{apiVersion}/jobs/{job}/log/tail \tab job\link{*}, study, lines, type \cr } @@ -129,11 +113,6 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } -\section{Endpoint /{apiVersion}/jobs/aggregationStats}{ - -Fetch catalog job stats. -} - \section{Endpoint /{apiVersion}/jobs/create}{ Register an executed job with POST method. @@ -179,6 +158,11 @@ Get job information. Update some job attributes. } +\section{Endpoint /{apiVersion}/jobs/{job}/kill}{ + +Send a signal to kill a pending or running job. +} + \section{Endpoint /{apiVersion}/jobs/{job}/log/head}{ Show the first lines of a log file (up to a limit). diff --git a/opencga-client/src/main/R/man/metaClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/metaClient-OpencgaR-method.Rd index 459b37a8fd..7ff0b3f297 100644 --- a/opencga-client/src/main/R/man/metaClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/metaClient-OpencgaR-method.Rd @@ -8,6 +8,8 @@ } \arguments{ \item{category}{List of categories to get API from.} + +\item{model}{Model description.} } \description{ This function implements the OpenCGA calls for managing Meta. @@ -16,6 +18,7 @@ The following table summarises the available \emph{actions} for this client:\tab about \tab /{apiVersion}/meta/about \tab \cr api \tab /{apiVersion}/meta/api \tab category \cr fail \tab /{apiVersion}/meta/fail \tab \cr + model \tab /{apiVersion}/meta/model \tab model \cr ping \tab /{apiVersion}/meta/ping \tab \cr status \tab /{apiVersion}/meta/status \tab \cr } @@ -35,6 +38,11 @@ API. Ping Opencga webservices. } +\section{Endpoint /{apiVersion}/meta/model}{ + +Opencga model webservices. +} + \section{Endpoint /{apiVersion}/meta/ping}{ Ping Opencga webservices. diff --git a/opencga-client/src/main/R/man/opencgaLogin.Rd b/opencga-client/src/main/R/man/opencgaLogin.Rd index 3d45560964..bf53a8152f 100644 --- a/opencga-client/src/main/R/man/opencgaLogin.Rd +++ b/opencga-client/src/main/R/man/opencgaLogin.Rd @@ -12,7 +12,8 @@ opencgaLogin( interactive = FALSE, autoRenew = FALSE, verbose = FALSE, - showToken = FALSE + showToken = FALSE, + organization = NULL ) } \arguments{ @@ -31,17 +32,17 @@ an Opencga class object \dontrun{ con <- initOpencgaR(host = "http://bioinfo.hpc.cam.ac.uk/opencga-prod/", version = "v2") -con <- opencgaLogin(opencga = con, userid = "demouser", passwd = "demouser", showToken = TRUE) +con <- opencgaLogin(opencga = con, userid = "xxx", passwd = "xxx", showToken = TRUE) # Configuration in list format conf <- list(version="v2", rest=list(host="http://bioinfo.hpc.cam.ac.uk/opencga-prod/")) con <- initOpencgaR(opencgaConfig=conf) -con <- opencgaLogin(opencga = con, userid = "demouser", passwd = demouser") +con <- opencgaLogin(opencga = con, userid = "xxx", passwd = xxx") # Configuration in file format ("YAML" or "JSON") conf <- "/path/to/conf/client-configuration.yml" con <- initOpencgaR(opencgaConfig=conf) -con <- opencgaLogin(opencga = con, userid = "demouser", passwd = "demouser") +con <- opencgaLogin(opencga = con, userid = "xxx", passwd = "xxx") } } \description{ diff --git a/opencga-client/src/main/R/man/operationClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/operationClient-OpencgaR-method.Rd index e088d95ff8..5a87c79576 100644 --- a/opencga-client/src/main/R/man/operationClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/operationClient-OpencgaR-method.Rd @@ -13,15 +13,15 @@ \item{annotationId}{Annotation identifier.} -\item{skipRebuild}{Skip sample index re-build.} - \item{name}{Unique name of the score within the study.} \item{resume}{Resume a previously failed remove.} \item{force}{Force remove of partially indexed scores.} -\item{project}{Project \link{user@}project where project can be either the ID or the alias.} +\item{skipRebuild}{Skip sample index re-build.} + +\item{project}{Project \link{organization@}project where project can be either the ID or the alias.} \item{samples}{Samples to remove. Needs to provide all the samples in the secondary index.} @@ -33,7 +33,13 @@ \item{jobTags}{Job tags.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} + +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{data}{Variant delete study params.} } @@ -42,29 +48,34 @@ This function implements the OpenCGA calls for managing Operations - Variant Sto The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr configureCellbase \tab /{apiVersion}/operation/cellbase/configure \tab project, annotationUpdate, annotationSaveId, body \cr - aggregateVariant \tab /{apiVersion}/operation/variant/aggregate \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - deleteVariantAnnotation \tab /{apiVersion}/operation/variant/annotation/delete \tab jobId, jobDescription, jobDependsOn, jobTags, project, annotationId \cr - indexVariantAnnotation \tab /{apiVersion}/operation/variant/annotation/index \tab jobId, jobDescription, jobDependsOn, jobTags, project, study, body \cr - saveVariantAnnotation \tab /{apiVersion}/operation/variant/annotation/save \tab jobId, jobDescription, jobDependsOn, jobTags, project, body \cr + aggregateVariant \tab /{apiVersion}/operation/variant/aggregate \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + deleteVariantAnnotation \tab /{apiVersion}/operation/variant/annotation/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, project, annotationId \cr + indexVariantAnnotation \tab /{apiVersion}/operation/variant/annotation/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, project, study, body \cr + saveVariantAnnotation \tab /{apiVersion}/operation/variant/annotation/save \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, project, body \cr configureVariant \tab /{apiVersion}/operation/variant/configure \tab project, study, body \cr - deleteVariant \tab /{apiVersion}/operation/variant/delete \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - aggregateVariantFamily \tab /{apiVersion}/operation/variant/family/aggregate \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - indexVariantFamily \tab /{apiVersion}/operation/variant/family/index \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - indexVariant \tab /{apiVersion}/operation/variant/index \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - launcherVariantIndex \tab /{apiVersion}/operation/variant/index/launcher \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - runVariantJulie \tab /{apiVersion}/operation/variant/julie/run \tab jobId, jobDescription, jobDependsOn, jobTags, project, body\link{*} \cr - repairVariantMetadata \tab /{apiVersion}/operation/variant/metadata/repair \tab jobId, jobDescription, jobDependsOn, jobTags, body \cr - synchronizeVariantMetadata \tab /{apiVersion}/operation/variant/metadata/synchronize \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - deleteVariantSample \tab /{apiVersion}/operation/variant/sample/delete \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - indexVariantSample \tab /{apiVersion}/operation/variant/sample/index \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr + deleteVariant \tab /{apiVersion}/operation/variant/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + aggregateVariantFamily \tab /{apiVersion}/operation/variant/family/aggregate \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + indexVariantFamily \tab /{apiVersion}/operation/variant/family/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + indexVariant \tab /{apiVersion}/operation/variant/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + launcherVariantIndex \tab /{apiVersion}/operation/variant/index/launcher \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + runVariantJulie \tab /{apiVersion}/operation/variant/julie/run \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, project, body\link{*} \cr + repairVariantMetadata \tab /{apiVersion}/operation/variant/metadata/repair \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body \cr + synchronizeVariantMetadata \tab /{apiVersion}/operation/variant/metadata/synchronize \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + pruneVariant \tab /{apiVersion}/operation/variant/prune \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body \cr + deleteVariantSample \tab /{apiVersion}/operation/variant/sample/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + indexVariantSample \tab /{apiVersion}/operation/variant/sample/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr variantSampleIndexConfigure \tab /{apiVersion}/operation/variant/sample/index/configure \tab study, skipRebuild, body \cr - deleteVariantScore \tab /{apiVersion}/operation/variant/score/delete \tab jobId, jobDescription, jobDependsOn, jobTags, study, name, resume, force \cr - indexVariantScore \tab /{apiVersion}/operation/variant/score/index \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr - secondaryIndexVariant \tab /{apiVersion}/operation/variant/secondaryIndex \tab jobId, jobDescription, jobDependsOn, jobTags, project, study, body \cr - deleteVariantSecondaryIndex \tab /{apiVersion}/operation/variant/secondaryIndex/delete \tab jobId, jobDescription, jobDependsOn, jobTags, study, samples \cr - deleteVariantStats \tab /{apiVersion}/operation/variant/stats/delete \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - indexVariantStats \tab /{apiVersion}/operation/variant/stats/index \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - deleteVariantStudy \tab /{apiVersion}/operation/variant/study/delete \tab jobId, jobDescription, jobDependsOn, jobTags, study, body \cr + deleteVariantScore \tab /{apiVersion}/operation/variant/score/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, name, resume, force \cr + indexVariantScore \tab /{apiVersion}/operation/variant/score/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + variantSecondaryAnnotationIndex \tab /{apiVersion}/operation/variant/secondary/annotation/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, project, study, body \cr + variantSecondarySampleIndex \tab /{apiVersion}/operation/variant/secondary/sample/index \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr + configureVariantSecondarySampleIndex \tab /{apiVersion}/operation/variant/secondary/sample/index/configure \tab study, skipRebuild, body \cr + secondaryIndexVariant \tab /{apiVersion}/operation/variant/secondaryIndex \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, project, study, body \cr + deleteVariantSecondaryIndex \tab /{apiVersion}/operation/variant/secondaryIndex/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, samples \cr + setupVariant \tab /{apiVersion}/operation/variant/setup \tab study, body \cr + deleteVariantStats \tab /{apiVersion}/operation/variant/stats/delete \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + indexVariantStats \tab /{apiVersion}/operation/variant/stats/index \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + deleteVariantStudy \tab /{apiVersion}/operation/variant/study/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, body \cr } } \section{Endpoint /{apiVersion}/operation/cellbase/configure}{ @@ -109,7 +120,7 @@ Find variants where not all the samples are present, and fill the empty values. \section{Endpoint /{apiVersion}/operation/variant/family/index}{ -Build the family index. +DEPRECATED: integrated in index (DEPRECATED Build the family index). } \section{Endpoint /{apiVersion}/operation/variant/index}{ @@ -137,6 +148,11 @@ Execute some repairs on Variant Storage Metadata. Advanced users only. Synchronize catalog with variant storage metadata. } +\section{Endpoint /{apiVersion}/operation/variant/prune}{ + +Prune orphan variants from studies in a project. +} + \section{Endpoint /{apiVersion}/operation/variant/sample/delete}{ Remove variant samples from the variant storage. @@ -144,12 +160,12 @@ Remove variant samples from the variant storage. \section{Endpoint /{apiVersion}/operation/variant/sample/index}{ -Build and annotate the sample index. +DEPRECATED You should use the new sample index method instead. } \section{Endpoint /{apiVersion}/operation/variant/sample/index/configure}{ -Update SampleIndex configuration. +DEPRECATED You should use the new sample index configure method. } \section{Endpoint /{apiVersion}/operation/variant/score/delete}{ @@ -162,9 +178,24 @@ Remove a variant score in the database. Index a variant score in the database. } +\section{Endpoint /{apiVersion}/operation/variant/secondary/annotation/index}{ + +Creates a secondary index using a search engine. If samples are provided, sample data will be added to the secondary index. (New!). +} + +\section{Endpoint /{apiVersion}/operation/variant/secondary/sample/index}{ + +Build and annotate the sample index. (New!) . +} + +\section{Endpoint /{apiVersion}/operation/variant/secondary/sample/index/configure}{ + +Update SampleIndex configuration (New!). +} + \section{Endpoint /{apiVersion}/operation/variant/secondaryIndex}{ -Creates a secondary index using a search engine. If samples are provided, sample data will be added to the secondary index. +DEPRECATED you should use the new annotation index method instead. } \section{Endpoint /{apiVersion}/operation/variant/secondaryIndex/delete}{ @@ -172,6 +203,11 @@ Creates a secondary index using a search engine. If samples are provided, sample Remove a secondary index from the search engine for a specific set of samples. } +\section{Endpoint /{apiVersion}/operation/variant/setup}{ + +Execute Variant Setup to allow using the variant engine. This setup is necessary before starting any variant operation. +} + \section{Endpoint /{apiVersion}/operation/variant/stats/delete}{ Deletes the VariantStats of a cohort/s from the database. diff --git a/opencga-client/src/main/R/man/organizationClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/organizationClient-OpencgaR-method.Rd new file mode 100644 index 0000000000..955a65d24e --- /dev/null +++ b/opencga-client/src/main/R/man/organizationClient-OpencgaR-method.Rd @@ -0,0 +1,124 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Organization-methods.R +\name{organizationClient,OpencgaR-method} +\alias{organizationClient,OpencgaR-method} +\title{OrganizationClient methods} +\usage{ +\S4method{organizationClient}{OpencgaR}( + OpencgaR, + id, + organization, + user, + endpointName, + params = NULL, + ... +) +} +\arguments{ +\item{id}{Note unique identifier.} + +\item{organization}{Organization id.} + +\item{user}{User ID.} + +\item{creationDate}{Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} + +\item{modificationDate}{Modification date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} + +\item{scope}{Scope of the Note.} + +\item{visibility}{Visibility of the Note.} + +\item{uuid}{Unique 32-character identifier assigned automatically by OpenCGA.} + +\item{userId}{User that wrote that Note.} + +\item{tags}{Note tags.} + +\item{version}{Autoincremental version assigned to the registered entry. By default, updates does not create new versions. To enable versioning, users must set the \code{incVersion} flag from the /update web service when updating the document.} + +\item{tagsAction}{Action to be performed if the array of tags is being updated. Allowed values: \link{'ADD SET REMOVE'}} + +\item{authenticationOriginsAction}{Action to be performed if the array of authenticationOrigins is being updated. Allowed values: \link{'ADD SET REMOVE REPLACE'}} + +\item{include}{Fields included in the response, whole JSON path must be provided.} + +\item{exclude}{Fields excluded in the response, whole JSON path must be provided.} + +\item{includeResult}{Flag indicating to include the created or updated document result in the response.} + +\item{adminsAction}{Action to be performed if the array of admins is being updated. Allowed values: \link{'ADD REMOVE'}} + +\item{data}{JSON containing the params to be updated.} +} +\description{ +This function implements the OpenCGA calls for managing Organizations. +The following table summarises the available \emph{actions} for this client:\tabular{llr}{ + endpointName \tab Endpoint WS \tab parameters accepted \cr + create \tab /{apiVersion}/organizations/create \tab include, exclude, includeResult, body\link{*} \cr + createNotes \tab /{apiVersion}/organizations/notes/create \tab include, exclude, includeResult, body\link{*} \cr + searchNotes \tab /{apiVersion}/organizations/notes/search \tab include, exclude, creationDate, modificationDate, id, scope, visibility, uuid, userId, tags, version \cr + deleteNotes \tab /{apiVersion}/organizations/notes/{id}/delete \tab id\link{*}, includeResult \cr + updateNotes \tab /{apiVersion}/organizations/notes/{id}/update \tab include, exclude, id\link{*}, tagsAction, includeResult, body\link{*} \cr + userUpdateStatus \tab /{apiVersion}/organizations/user/{user}/status/update \tab include, exclude, user\link{*}, organization, includeResult, body\link{*} \cr + updateUser \tab /{apiVersion}/organizations/user/{user}/update \tab include, exclude, user\link{*}, organization, includeResult, body\link{*} \cr + updateConfiguration \tab /{apiVersion}/organizations/{organization}/configuration/update \tab include, exclude, organization\link{*}, includeResult, authenticationOriginsAction, body\link{*} \cr + info \tab /{apiVersion}/organizations/{organization}/info \tab include, exclude, organization\link{*} \cr + update \tab /{apiVersion}/organizations/{organization}/update \tab include, exclude, organization\link{*}, includeResult, adminsAction, body\link{*} \cr +} +} +\section{Endpoint /{apiVersion}/organizations/create}{ + +Create a new organization. +} + +\section{Endpoint /{apiVersion}/organizations/notes/create}{ + +Create a new note. +} + +\section{Endpoint /{apiVersion}/organizations/notes/search}{ + +Search for notes of scope ORGANIZATION. +} + +\section{Endpoint /{apiVersion}/organizations/notes/{id}/delete}{ + +Delete note. +} + +\section{Endpoint /{apiVersion}/organizations/notes/{id}/update}{ + +Update a note. +} + +\section{Endpoint /{apiVersion}/organizations/user/{user}/status/update}{ + +Update the user status. +} + +\section{Endpoint /{apiVersion}/organizations/user/{user}/update}{ + +Update the user information. +} + +\section{Endpoint /{apiVersion}/organizations/{organization}/configuration/update}{ + +Update the Organization configuration attributes. +} + +\section{Endpoint /{apiVersion}/organizations/{organization}/info}{ + +Return the organization information. +} + +\section{Endpoint /{apiVersion}/organizations/{organization}/update}{ + +Update some organization attributes. +} + +\seealso{ +\url{http://docs.opencb.org/display/opencga/Using+OpenCGA} and the RESTful API documentation +\url{http://bioinfo.hpc.cam.ac.uk/opencga-prod/webservices/} +\link{*}: Required parameter +} diff --git a/opencga-client/src/main/R/man/panelClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/panelClient-OpencgaR-method.Rd index 29c09434e1..250423df12 100644 --- a/opencga-client/src/main/R/man/panelClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/panelClient-OpencgaR-method.Rd @@ -11,11 +11,23 @@ \item{panels}{Comma separated list of panel ids.} -\item{action}{Action to be performed \link{ADD, SET, REMOVE or RESET}.} +\item{action}{Action to be performed \link{ADD, SET, REMOVE or RESET}. Allowed values: \link{'SET ADD REMOVE RESET'}} -\item{source}{Comma separated list of sources to import panels from. Current supported sources are 'panelapp' and 'cancer-gene-census'.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} -\item{field}{Field for which to obtain the distinct values.} +\item{jobId}{Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided.} + +\item{jobDependsOn}{Comma separated list of existing job IDs the job will depend on.} + +\item{jobDescription}{Job description.} + +\item{jobTags}{Job tags.} + +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} \item{limit}{Number of results to be returned.} @@ -23,23 +35,27 @@ \item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{id}{Comma separated list of panel IDs up to a maximum of 100.} +\item{id}{Comma separated list of panel IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list of panel UUIDs up to a maximum of 100.} -\item{name}{Comma separated list of panel names up to a maximum of 100.} +\item{name}{Comma separated list of panel names up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{disorders}{Comma separated list of disorder ids or names.} +\item{internalStatus}{Filter by internal status.} -\item{variants}{Comma separated list of variant ids.} +\item{disorders}{Comma separated list of disorder ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{genes}{Comma separated list of gene ids.} +\item{variants}{Comma separated list of variant ids. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{regions}{Comma separated list of regions.} +\item{genes}{Comma separated list of gene ids. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} -\item{categories}{Comma separated list of category names.} +\item{source}{Comma separated list of source ids or names.} -\item{tags}{Panel tags.} +\item{regions}{Comma separated list of regions. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} + +\item{categories}{Comma separated list of category names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} + +\item{tags}{Panel tags. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{status}{Filter by status.} @@ -65,9 +81,7 @@ \item{exclude}{Fields excluded in the response, whole JSON path must be provided.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} - -\item{incVersion}{Create a new version of panel.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{includeResult}{Flag indicating to include the created or updated document result in the response.} @@ -78,13 +92,14 @@ This function implements the OpenCGA calls for managing Disease Panels. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/panels/acl/{members}/update \tab study, members\link{*}, action\link{*}, body\link{*} \cr - create \tab /{apiVersion}/panels/create \tab include, exclude, study, source, id, includeResult, body \cr - distinct \tab /{apiVersion}/panels/distinct \tab study, id, uuid, name, disorders, variants, genes, regions, categories, tags, deleted, status, creationDate, modificationDate, acl, release, snapshot, field\link{*} \cr - search \tab /{apiVersion}/panels/search \tab include, exclude, limit, skip, count, study, id, uuid, name, disorders, variants, genes, regions, categories, tags, deleted, status, creationDate, modificationDate, acl, release, snapshot \cr + create \tab /{apiVersion}/panels/create \tab include, exclude, study, includeResult, body \cr + distinct \tab /{apiVersion}/panels/distinct \tab study, id, uuid, name, internalStatus, disorders, variants, genes, source, regions, categories, tags, deleted, status, creationDate, modificationDate, acl, release, snapshot, field\link{*} \cr + importPanels \tab /{apiVersion}/panels/import \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body \cr + search \tab /{apiVersion}/panels/search \tab include, exclude, limit, skip, count, study, id, uuid, name, internalStatus, disorders, variants, genes, source, regions, categories, tags, deleted, status, creationDate, modificationDate, acl, release, snapshot \cr acl \tab /{apiVersion}/panels/{panels}/acl \tab panels\link{*}, study, member, silent \cr delete \tab /{apiVersion}/panels/{panels}/delete \tab study, panels\link{*} \cr info \tab /{apiVersion}/panels/{panels}/info \tab include, exclude, panels\link{*}, study, version, deleted \cr - update \tab /{apiVersion}/panels/{panels}/update \tab include, exclude, study, panels\link{*}, incVersion, includeResult, body \cr + update \tab /{apiVersion}/panels/{panels}/update \tab include, exclude, study, panels\link{*}, includeResult, body \cr } } \section{Endpoint /{apiVersion}/panels/acl/{members}/update}{ @@ -102,6 +117,11 @@ Create a panel. Panel distinct method. } +\section{Endpoint /{apiVersion}/panels/import}{ + +Import panels. +} + \section{Endpoint /{apiVersion}/panels/search}{ Panel search. diff --git a/opencga-client/src/main/R/man/projectClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/projectClient-OpencgaR-method.Rd index 430336cf00..217ad69a75 100644 --- a/opencga-client/src/main/R/man/projectClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/projectClient-OpencgaR-method.Rd @@ -4,23 +4,21 @@ \alias{projectClient,OpencgaR-method} \title{ProjectClient methods} \usage{ -\S4method{projectClient}{OpencgaR}(OpencgaR, projects, project, endpointName, params = NULL, ...) +\S4method{projectClient}{OpencgaR}(OpencgaR, project, projects, endpointName, params = NULL, ...) } \arguments{ -\item{projects}{Comma separated list of projects \link{user@}project up to a maximum of 100.} +\item{project}{Project \link{organization@}project where project can be either the ID or the alias.} -\item{project}{Project \link{user@}project where project can be either the ID or the alias.} +\item{projects}{Comma separated list of projects \link{organization@}project up to a maximum of 100.} -\item{owner}{Owner of the project.} +\item{organization}{Project organization.} -\item{id}{Project \link{user@}project where project can be either the ID or the alias.} +\item{id}{Project \link{organization@}project where project can be either the ID or the alias.} \item{name}{Project name.} \item{fqn}{Project fqn.} -\item{organization}{Project organization.} - \item{description}{Project description.} \item{study}{Study id.} @@ -33,20 +31,6 @@ \item{attributes}{Attributes.} -\item{default}{Calculate default stats.} - -\item{fileFields}{List of file fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{individualFields}{List of individual fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{familyFields}{List of family fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{sampleFields}{List of sample fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{cohortFields}{List of cohort fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{jobFields}{List of job fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - \item{limit}{Number of results to be returned.} \item{skip}{Number of results to skip.} @@ -64,8 +48,7 @@ This function implements the OpenCGA calls for managing Projects. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr create \tab /{apiVersion}/projects/create \tab include, exclude, includeResult, body\link{*} \cr - search \tab /{apiVersion}/projects/search \tab include, exclude, limit, skip, owner, id, name, fqn, organization, description, study, creationDate, modificationDate, internalStatus, attributes \cr - aggregationStats \tab /{apiVersion}/projects/{projects}/aggregationStats \tab projects\link{*}, default, fileFields, individualFields, familyFields, sampleFields, cohortFields, jobFields \cr + search \tab /{apiVersion}/projects/search \tab include, exclude, limit, skip, organization, id, name, fqn, organization, description, study, creationDate, modificationDate, internalStatus, attributes \cr info \tab /{apiVersion}/projects/{projects}/info \tab include, exclude, projects\link{*} \cr incRelease \tab /{apiVersion}/projects/{project}/incRelease \tab project\link{*} \cr studies \tab /{apiVersion}/projects/{project}/studies \tab include, exclude, limit, skip, project\link{*} \cr @@ -82,11 +65,6 @@ Create a new project. Search projects. } -\section{Endpoint /{apiVersion}/projects/{projects}/aggregationStats}{ - -Fetch catalog project stats. -} - \section{Endpoint /{apiVersion}/projects/{projects}/info}{ Fetch project information. diff --git a/opencga-client/src/main/R/man/sampleClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/sampleClient-OpencgaR-method.Rd index 8c8302651a..5c4f8b5ead 100644 --- a/opencga-client/src/main/R/man/sampleClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/sampleClient-OpencgaR-method.Rd @@ -6,37 +6,23 @@ \usage{ \S4method{sampleClient}{OpencgaR}( OpencgaR, - samples, - members, annotationSet, + members, sample, + samples, endpointName, params = NULL, ... ) } \arguments{ -\item{samples}{Comma separated list sample IDs or UUIDs up to a maximum of 100.} +\item{annotationSet}{AnnotationSet ID to be updated.} \item{members}{Comma separated list of user or group ids.} -\item{annotationSet}{AnnotationSet ID to be updated.} - \item{sample}{Sample ID.} -\item{source}{Source.} - -\item{creationYear}{Creation year.} - -\item{creationMonth}{Creation month (JANUARY, FEBRUARY...).} - -\item{creationDay}{Creation day.} - -\item{creationDayOfWeek}{Creation day of week (MONDAY, TUESDAY...).} - -\item{type}{Type.} - -\item{default}{Calculate default stats.} +\item{samples}{Comma separated list sample IDs or UUIDs up to a maximum of 100.} \item{variableSetId}{Variable set ID or name.} @@ -46,7 +32,7 @@ \item{annotationSetId}{Annotation set id. If not provided, variableSetId will be used.} -\item{field}{Field for which to obtain the distinct values.} +\item{field}{Comma separated list of fields for which to obtain the distinct values.} \item{file}{file.} @@ -58,7 +44,7 @@ \item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{id}{Comma separated list sample IDs up to a maximum of 100.} +\item{id}{Comma separated list sample IDs up to a maximum of 100. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{uuid}{Comma separated list sample UUIDs up to a maximum of 100.} @@ -92,13 +78,13 @@ \item{collectionMethod}{Collection method.} -\item{phenotypes}{Comma separated list of phenotype ids or names.} +\item{phenotypes}{Comma separated list of phenotype ids or names. Also admits basic regular expressions using the operator '~', i.e. '~{perl-regex}' e.g. '~value' for case sensitive, '~/value/i' for case insensitive search.} \item{annotation}{Annotation filters. Example: age>30;gender=FEMALE. For more information, please visit http://docs.opencb.org/display/opencga/AnnotationSets+1.4.0.} \item{acl}{Filter entries for which a user has the provided permissions. Format: acl={user}:{permissions}. Example: acl=john:WRITE,WRITE_ANNOTATIONS will return all entries for which user john has both WRITE and WRITE_ANNOTATIONS permissions. Only study owners or administrators can query by this field. .} -\item{internalRgaStatus}{Index status of the sample for the Recessive Gene Analysis.} +\item{internalRgaStatus}{Index status of the sample for the Recessive Gene Analysis. Allowed values: \link{'NOT_INDEXED INDEXED INVALID_PERMISSIONS INVALID_METADATA INVALID'}} \item{release}{Release when it was created.} @@ -152,17 +138,15 @@ \item{exclude}{Fields excluded in the response, whole JSON path must be provided.} -\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{annotationSetsAction}{Action to be performed if the array of annotationSets is being updated. Allowed values: \link{'ADD SET REMOVE'}} -\item{phenotypesAction}{Action to be performed if the array of phenotypes is being updated \link{SET, ADD, REMOVE}. Allowed values: \link{'ADD', 'SET', 'REMOVE'}} +\item{phenotypesAction}{Action to be performed if the array of phenotypes is being updated \link{SET, ADD, REMOVE}. Allowed values: \link{'ADD SET REMOVE'}} \item{includeResult}{Flag indicating to include the created or updated document result in the response.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} -\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD', 'SET', 'REMOVE', 'RESET', 'REPLACE'}} - -\item{incVersion}{Create a new version of sample.} +\item{action}{Action to be performed: ADD to add new annotations; REPLACE to replace the value of an already existing annotation; SET to set the new list of annotations removing any possible old annotations; REMOVE to remove some annotations; RESET to set some annotations to the default value configured in the corresponding variables of the VariableSet if any. Allowed values: \link{'ADD SET REMOVE RESET REPLACE'}} \item{data}{Json containing the map of annotations when the action is ADD, SET or REPLACE, a json with only the key 'remove' containing the comma separated variables to be removed as a value when the action is REMOVE or a json with only the key 'reset' containing the comma separated variables that will be set to the default value when the action is RESET.} } @@ -171,7 +155,6 @@ This function implements the OpenCGA calls for managing Samples. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr updateAcl \tab /{apiVersion}/samples/acl/{members}/update \tab study, members\link{*}, action\link{*}, body\link{*} \cr - aggregationStats \tab /{apiVersion}/samples/aggregationStats \tab study, source, creationYear, creationMonth, creationDay, creationDayOfWeek, status, type, phenotypes, release, version, somatic, annotation, default, field \cr loadAnnotationSets \tab /{apiVersion}/samples/annotationSets/load \tab study, variableSetId\link{*}, path\link{*}, parents, annotationSetId, body \cr create \tab /{apiVersion}/samples/create \tab include, exclude, study, includeResult, body\link{*} \cr distinct \tab /{apiVersion}/samples/distinct \tab study, id, uuid, somatic, individualId, fileIds, cohortIds, creationDate, modificationDate, internalStatus, status, processingProduct, processingPreparationMethod, processingExtractionMethod, processingLabSampleId, collectionFrom, collectionType, collectionMethod, phenotypes, annotation, acl, internalRgaStatus, release, snapshot, deleted, statsId, statsVariantCount, statsChromosomeCount, statsTypeCount, statsGenotypeCount, statsTiTvRatio, statsQualityAvg, statsQualityStdDev, statsHeterozygosityRate, statsDepthCount, statsBiotypeCount, statsClinicalSignificanceCount, statsConsequenceTypeCount, field\link{*} \cr @@ -180,8 +163,8 @@ The following table summarises the available \emph{actions} for this client:\tab acl \tab /{apiVersion}/samples/{samples}/acl \tab samples\link{*}, study, member, silent \cr delete \tab /{apiVersion}/samples/{samples}/delete \tab force, emptyFilesAction, deleteEmptyCohorts, study, samples\link{*} \cr info \tab /{apiVersion}/samples/{samples}/info \tab include, exclude, includeIndividual, flattenAnnotations, samples\link{*}, study, version, deleted \cr - update \tab /{apiVersion}/samples/{samples}/update \tab include, exclude, samples\link{*}, study, incVersion, annotationSetsAction, phenotypesAction, includeResult, body \cr - updateAnnotationSetsAnnotations \tab /{apiVersion}/samples/{sample}/annotationSets/{annotationSet}/annotations/update \tab sample\link{*}, study, annotationSet\link{*}, action, incVersion, body \cr + update \tab /{apiVersion}/samples/{samples}/update \tab include, exclude, samples\link{*}, study, annotationSetsAction, phenotypesAction, includeResult, body \cr + updateAnnotationSetsAnnotations \tab /{apiVersion}/samples/{sample}/annotationSets/{annotationSet}/annotations/update \tab sample\link{*}, study, annotationSet\link{*}, action, body \cr } } \section{Endpoint /{apiVersion}/samples/acl/{members}/update}{ @@ -189,11 +172,6 @@ The following table summarises the available \emph{actions} for this client:\tab Update the set of permissions granted for the member. } -\section{Endpoint /{apiVersion}/samples/aggregationStats}{ - -Fetch catalog sample stats. -} - \section{Endpoint /{apiVersion}/samples/annotationSets/load}{ Load annotation sets from a TSV file. diff --git a/opencga-client/src/main/R/man/studyClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/studyClient-OpencgaR-method.Rd index ff2a333445..e64e72ed5a 100644 --- a/opencga-client/src/main/R/man/studyClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/studyClient-OpencgaR-method.Rd @@ -6,31 +6,34 @@ \usage{ \S4method{studyClient}{OpencgaR}( OpencgaR, - members, - study, - variableSet, group, + id, + members, studies, + study, templateId, + variableSet, endpointName, params = NULL, ... ) } \arguments{ -\item{members}{Comma separated list of user or group ids.} +\item{group}{Group name.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{id}{Id of the variableSet to be retrieved. If no id is passed, it will show all the variableSets of the study.} -\item{variableSet}{VariableSet id of the VariableSet to be updated.} +\item{members}{Comma separated list of user or group ids.} -\item{group}{Group name.} +\item{studies}{Comma separated list of Studies [\link{organization@}project:]study where study and project can be either the ID or UUID up to a maximum of 100.} -\item{studies}{Comma separated list of Studies [\link{user@}project:]study where study and project can be either the ID or UUID up to a maximum of 100.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{templateId}{Template id.} -\item{project}{Project \link{user@}project where project can be either the ID or the alias.} +\item{variableSet}{VariableSet id of the VariableSet to be updated.} + +\item{project}{Project \link{organization@}project where project can be either the ID or the alias.} \item{name}{Study name.} @@ -38,10 +41,6 @@ \item{fqn}{Study full qualified name.} -\item{creationDate}{Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} - -\item{modificationDate}{Modification date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} - \item{internalStatus}{Filter by internal status.} \item{attributes}{Attributes.} @@ -50,20 +49,6 @@ \item{member}{User or group id.} -\item{default}{Calculate default stats.} - -\item{fileFields}{List of file fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{individualFields}{List of individual fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{familyFields}{List of family fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{sampleFields}{List of sample fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{cohortFields}{List of cohort fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - -\item{jobFields}{List of job fields separated by semicolons, e.g.: studies;type. For nested fields use >>, e.g.: studies>>biotype;type.} - \item{limit}{Number of results to be returned.} \item{skip}{Number of results to skip.} @@ -72,21 +57,35 @@ \item{operationId}{Audit operation UUID.} -\item{userId}{User ID.} - -\item{resource}{Resource involved.} +\item{resource}{Resource involved. Allowed values: \link{'AUDIT NOTE ORGANIZATION USER PROJECT STUDY FILE SAMPLE JOB INDIVIDUAL COHORT DISEASE_PANEL FAMILY CLINICAL_ANALYSIS INTERPRETATION VARIANT ALIGNMENT CLINICAL EXPRESSION RGA FUNCTIONAL'}} \item{resourceId}{Resource ID.} \item{resourceUuid}{resource UUID.} -\item{status}{Filter by status.} +\item{status}{Filter by status. Allowed values: \link{'SUCCESS ERROR'}} \item{date}{Date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} \item{silent}{Boolean to retrieve all possible entries that are queried for, false to raise an exception whenever one of the entries looked for cannot be shown for whichever reason.} -\item{entity}{Entity where the permission rules should be applied to.} +\item{creationDate}{Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} + +\item{modificationDate}{Modification date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805.} + +\item{uuid}{Unique 32-character identifier assigned automatically by OpenCGA.} + +\item{userId}{User that wrote that Note.} + +\item{tags}{Note tags.} + +\item{visibility}{Visibility of the Note.} + +\item{version}{Autoincremental version assigned to the registered entry. By default, updates does not create new versions. To enable versioning, users must set the \code{incVersion} flag from the /update web service when updating the document.} + +\item{tagsAction}{Action to be performed if the array of tags is being updated. Allowed values: \link{'ADD SET REMOVE'}} + +\item{entity}{Entity where the permission rules should be applied to. Allowed values: \link{'SAMPLES FILES COHORTS INDIVIDUALS FAMILIES JOBS CLINICAL_ANALYSES DISEASE_PANELS'}} \item{jobId}{Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided.} @@ -96,6 +95,12 @@ \item{jobTags}{Job tags.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} + \item{file}{File to upload.} \item{include}{Fields included in the response, whole JSON path must be provided.} @@ -104,9 +109,7 @@ \item{includeResult}{Flag indicating to include the created or updated document result in the response.} -\item{id}{Id of the variableSet to be retrieved. If no id is passed, it will show all the variableSets of the study.} - -\item{action}{Action to be performed: ADD or REMOVE a variable. Allowed values: \link{'ADD', 'REMOVE'}} +\item{action}{Action to be performed: ADD or REMOVE a variable. Allowed values: \link{'ADD REMOVE'}} \item{data}{JSON containing the variable to be added or removed. For removing, only the variable id will be needed.} } @@ -118,17 +121,20 @@ The following table summarises the available \emph{actions} for this client:\tab create \tab /{apiVersion}/studies/create \tab include, exclude, project, includeResult, body\link{*} \cr search \tab /{apiVersion}/studies/search \tab include, exclude, limit, skip, count, project\link{*}, name, id, alias, fqn, creationDate, modificationDate, internalStatus, status, attributes, release \cr acl \tab /{apiVersion}/studies/{studies}/acl \tab studies\link{*}, member, silent \cr - aggregationStats \tab /{apiVersion}/studies/{studies}/aggregationStats \tab studies\link{*}, default, fileFields, individualFields, familyFields, sampleFields, cohortFields, jobFields \cr info \tab /{apiVersion}/studies/{studies}/info \tab include, exclude, studies\link{*} \cr searchAudit \tab /{apiVersion}/studies/{study}/audit/search \tab include, exclude, limit, skip, count, study\link{*}, operationId, userId, action, resource, resourceId, resourceUuid, status, date \cr groups \tab /{apiVersion}/studies/{study}/groups \tab study\link{*}, id, silent \cr updateGroups \tab /{apiVersion}/studies/{study}/groups/update \tab study\link{*}, action, body\link{*} \cr updateGroupsUsers \tab /{apiVersion}/studies/{study}/groups/{group}/users/update \tab study\link{*}, group\link{*}, action, body\link{*} \cr + createNotes \tab /{apiVersion}/studies/{study}/notes/create \tab include, exclude, study\link{*}, includeResult, body\link{*} \cr + searchNotes \tab /{apiVersion}/studies/{study}/notes/search \tab include, exclude, study\link{*}, creationDate, modificationDate, id, uuid, userId, tags, visibility, version \cr + deleteNotes \tab /{apiVersion}/studies/{study}/notes/{id}/delete \tab study\link{*}, id\link{*}, includeResult \cr + updateNotes \tab /{apiVersion}/studies/{study}/notes/{id}/update \tab include, exclude, study\link{*}, id\link{*}, tagsAction, includeResult, body\link{*} \cr permissionRules \tab /{apiVersion}/studies/{study}/permissionRules \tab study\link{*}, entity\link{*} \cr updatePermissionRules \tab /{apiVersion}/studies/{study}/permissionRules/update \tab study\link{*}, entity\link{*}, action, body\link{*} \cr - runTemplates \tab /{apiVersion}/studies/{study}/templates/run \tab study\link{*}, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr + runTemplates \tab /{apiVersion}/studies/{study}/templates/run \tab study\link{*}, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr uploadTemplates \tab /{apiVersion}/studies/{study}/templates/upload \tab file, study\link{*} \cr - deleteTemplates \tab /{apiVersion}/studies/{study}/templates/{templateId}/delete \tab study, templateId\link{*} \cr + deleteTemplates \tab /{apiVersion}/studies/{study}/templates/{templateId}/delete \tab study\link{*}, templateId\link{*} \cr update \tab /{apiVersion}/studies/{study}/update \tab include, exclude, study\link{*}, includeResult, body\link{*} \cr variableSets \tab /{apiVersion}/studies/{study}/variableSets \tab study\link{*}, id \cr updateVariableSets \tab /{apiVersion}/studies/{study}/variableSets/update \tab study\link{*}, action, body\link{*} \cr @@ -155,11 +161,6 @@ Search studies. Return the acl of the study. If member is provided, it will only return the acl for the member. } -\section{Endpoint /{apiVersion}/studies/{studies}/aggregationStats}{ - -Fetch catalog study stats. -} - \section{Endpoint /{apiVersion}/studies/{studies}/info}{ Fetch study information. @@ -185,6 +186,26 @@ Add or remove a group. Add, set or remove users from an existing group. } +\section{Endpoint /{apiVersion}/studies/{study}/notes/create}{ + +Create a new note. +} + +\section{Endpoint /{apiVersion}/studies/{study}/notes/search}{ + +Search for notes of scope STUDY. +} + +\section{Endpoint /{apiVersion}/studies/{study}/notes/{id}/delete}{ + +Delete note. +} + +\section{Endpoint /{apiVersion}/studies/{study}/notes/{id}/update}{ + +Update a note. +} + \section{Endpoint /{apiVersion}/studies/{study}/permissionRules}{ Fetch permission rules. diff --git a/opencga-client/src/main/R/man/userClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/userClient-OpencgaR-method.Rd index dc0af0ba76..119962a14a 100644 --- a/opencga-client/src/main/R/man/userClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/userClient-OpencgaR-method.Rd @@ -13,15 +13,21 @@ \item{users}{Comma separated list of user IDs.} -\item{name}{Unique name (typically the name of the application).} +\item{limit}{Number of results to be returned.} -\item{id}{Filter id. If provided, it will only fetch the specified filter.} +\item{skip}{Number of results to skip.} -\item{action}{Action to be performed: ADD or REMOVE a group. Allowed values: \link{'ADD', 'REMOVE'}} +\item{count}{Get the total number of results matching the query. Deactivated by default.} -\item{limit}{Number of results to be returned.} +\item{authenticationId}{Authentication origin ID.} -\item{skip}{Number of results to skip.} +\item{organization}{Organization id.} + +\item{name}{Unique name (typically the name of the application).} + +\item{id}{Filter id. If provided, it will only fetch the specified filter.} + +\item{action}{Action to be performed: ADD or REMOVE a group. Allowed values: \link{'ADD REMOVE'}} \item{include}{Fields included in the response, whole JSON path must be provided.} @@ -35,20 +41,26 @@ This function implements the OpenCGA calls for managing Users. The following table summarises the available \emph{actions} for this client:\tabular{llr}{ endpointName \tab Endpoint WS \tab parameters accepted \cr + anonymous \tab /{apiVersion}/users/anonymous \tab organization\link{*} \cr create \tab /{apiVersion}/users/create \tab body\link{*} \cr login \tab /{apiVersion}/users/login \tab body \cr password \tab /{apiVersion}/users/password \tab body\link{*} \cr - info \tab /{apiVersion}/users/{users}/info \tab include, exclude, users\link{*} \cr + search \tab /{apiVersion}/users/search \tab include, exclude, limit, skip, count, organization, id, authenticationId \cr + info \tab /{apiVersion}/users/{users}/info \tab include, exclude, organization, users\link{*} \cr configs \tab /{apiVersion}/users/{user}/configs \tab user\link{*}, name \cr updateConfigs \tab /{apiVersion}/users/{user}/configs/update \tab user\link{*}, action, body\link{*} \cr filters \tab /{apiVersion}/users/{user}/filters \tab user\link{*}, id \cr updateFilters \tab /{apiVersion}/users/{user}/filters/update \tab user\link{*}, action, body\link{*} \cr updateFilter \tab /{apiVersion}/users/{user}/filters/{filterId}/update \tab user\link{*}, filterId\link{*}, body\link{*} \cr resetPassword \tab /{apiVersion}/users/{user}/password/reset \tab user\link{*} \cr - projects \tab /{apiVersion}/users/{user}/projects \tab include, exclude, limit, skip, user\link{*} \cr update \tab /{apiVersion}/users/{user}/update \tab include, exclude, user\link{*}, includeResult, body\link{*} \cr } } +\section{Endpoint /{apiVersion}/users/anonymous}{ + +Get an anonymous token to gain access to the system. +} + \section{Endpoint /{apiVersion}/users/create}{ Create a new user. @@ -64,6 +76,11 @@ Get identified and gain access to the system. Change the password of a user. } +\section{Endpoint /{apiVersion}/users/search}{ + +User search method. +} + \section{Endpoint /{apiVersion}/users/{users}/info}{ Return the user information including its projects and studies. @@ -99,11 +116,6 @@ Update a custom filter. Reset password. } -\section{Endpoint /{apiVersion}/users/{user}/projects}{ - -Retrieve the projects of the user. -} - \section{Endpoint /{apiVersion}/users/{user}/update}{ Update some user attributes. diff --git a/opencga-client/src/main/R/man/variantClient-OpencgaR-method.Rd b/opencga-client/src/main/R/man/variantClient-OpencgaR-method.Rd index 2dbe583e73..d9eb7d4dfb 100644 --- a/opencga-client/src/main/R/man/variantClient-OpencgaR-method.Rd +++ b/opencga-client/src/main/R/man/variantClient-OpencgaR-method.Rd @@ -11,9 +11,9 @@ \item{clinicalAnalysis}{Clinical analysis id.} -\item{modeOfInheritance}{Mode of inheritance.} +\item{modeOfInheritance}{Mode of inheritance. Allowed values: \link{'AUTOSOMAL_DOMINANT AUTOSOMAL_RECESSIVE X_LINKED_DOMINANT X_LINKED_RECESSIVE Y_LINKED MITOCHONDRIAL DE_NOVO MENDELIAN_ERROR COMPOUND_HETEROZYGOUS UNKNOWN'}} -\item{penetrance}{Penetrance.} +\item{penetrance}{Penetrance. Allowed values: \link{'COMPLETE INCOMPLETE UNKNOWN'}} \item{disorder}{Disorder id.} @@ -21,7 +21,9 @@ \item{job}{Job ID or UUID.} -\item{fitting}{Compute the relative proportions of the different mutational signatures demonstrated by the tumour.} +\item{msId}{Signature ID.} + +\item{msDescription}{Signature description.} \item{include}{Fields included in the response, whole JSON path must be provided.} @@ -37,7 +39,7 @@ \item{approximateCountSamplingSize}{Sampling size to get the approximate count. Larger values increase accuracy but also increase execution time.} -\item{id}{List of IDs, these can be rs IDs (dbSNP) or variants in the format chrom:start:ref:alt, e.g. rs116600158,19:7177679:C:T.} +\item{id}{List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T.} \item{reference}{Reference allele.} @@ -89,15 +91,15 @@ \item{gene}{List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter.} -\item{xref}{List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, ...} +\item{xref}{List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...} \item{proteinSubstitution}{Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number} or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant.} \item{conservation}{Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1.} -\item{populationFrequencyRef}{Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01.} +\item{populationFrequencyRef}{Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01.} -\item{populationFrequencyMaf}{Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01.} +\item{populationFrequencyMaf}{Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01.} \item{geneTraitId}{List of gene trait association id. e.g. "umls:C0007222" , "OMIM:269600".} @@ -135,7 +137,7 @@ \item{familyDisorder}{Specify the disorder to use for the family segregation.} -\item{familySegregation}{Filter by segregation mode from a given family. Accepted values: \link{ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous }.} +\item{familySegregation}{Filter by segregation mode from a given family. Accepted values: \link{ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous }.} \item{familyMembers}{Sub set of the members of a given family.} @@ -167,7 +169,7 @@ \item{transcriptFlag}{List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500.} -\item{populationFrequencyAlt}{Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01.} +\item{populationFrequencyAlt}{Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01.} \item{clinical}{Clinical source: clinvar, cosmic.} @@ -179,9 +181,9 @@ \item{sample}{Sample ID.} -\item{project}{Project \link{user@}project where project can be either the ID or the alias.} +\item{project}{Project \link{organization@}project where project can be either the ID or the alias.} -\item{study}{Study [\link{user@}project:]study where study and project can be either the ID or UUID.} +\item{study}{Study [\link{organization@}project:]study where study and project can be either the ID or UUID.} \item{jobId}{Job ID. It must be a unique string within the study. An ID will be autogenerated automatically if not provided.} @@ -191,6 +193,12 @@ \item{jobTags}{Job tags.} +\item{jobScheduledStartTime}{Time when the job is scheduled to start.} + +\item{jobPriority}{Priority of the job.} + +\item{jobDryRun}{Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.} + \item{data}{Variant stats params.} } \description{ @@ -203,37 +211,39 @@ The following table summarises the available \emph{actions} for this client:\tab runCircos \tab /{apiVersion}/analysis/variant/circos/run \tab study, body\link{*} \cr deleteCohortStats \tab /{apiVersion}/analysis/variant/cohort/stats/delete \tab study, cohort \cr infoCohortStats \tab /{apiVersion}/analysis/variant/cohort/stats/info \tab study, cohort\link{*} \cr - runCohortStats \tab /{apiVersion}/analysis/variant/cohort/stats/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runExport \tab /{apiVersion}/analysis/variant/export/run \tab include, exclude, project, study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runCohortStats \tab /{apiVersion}/analysis/variant/cohort/stats/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runExomiser \tab /{apiVersion}/analysis/variant/exomiser/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runExport \tab /{apiVersion}/analysis/variant/export/run \tab include, exclude, project, study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr genotypesFamily \tab /{apiVersion}/analysis/variant/family/genotypes \tab study, family, clinicalAnalysis, modeOfInheritance\link{*}, penetrance, disorder \cr - runFamilyQc \tab /{apiVersion}/analysis/variant/family/qc/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - deleteFile \tab /{apiVersion}/analysis/variant/file/delete \tab jobId, jobDescription, jobDependsOn, jobTags, study, file, resume \cr - runGatk \tab /{apiVersion}/analysis/variant/gatk/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runGenomePlot \tab /{apiVersion}/analysis/variant/genomePlot/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runGwas \tab /{apiVersion}/analysis/variant/gwas/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runIndex \tab /{apiVersion}/analysis/variant/index/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, body\link{*} \cr - runIndividualQc \tab /{apiVersion}/analysis/variant/individual/qc/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runInferredSex \tab /{apiVersion}/analysis/variant/inferredSex/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runFamilyQc \tab /{apiVersion}/analysis/variant/family/qc/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + deleteFile \tab /{apiVersion}/analysis/variant/file/delete \tab jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, study, file, resume \cr + runGatk \tab /{apiVersion}/analysis/variant/gatk/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runGenomePlot \tab /{apiVersion}/analysis/variant/genomePlot/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runGwas \tab /{apiVersion}/analysis/variant/gwas/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runHrDetect \tab /{apiVersion}/analysis/variant/hrDetect/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runIndex \tab /{apiVersion}/analysis/variant/index/run \tab study, jobId, jobDependsOn, jobDescription, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runIndividualQc \tab /{apiVersion}/analysis/variant/individual/qc/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runInferredSex \tab /{apiVersion}/analysis/variant/inferredSex/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr queryKnockoutGene \tab /{apiVersion}/analysis/variant/knockout/gene/query \tab limit, skip, study, job \cr queryKnockoutIndividual \tab /{apiVersion}/analysis/variant/knockout/individual/query \tab limit, skip, study, job \cr - runKnockout \tab /{apiVersion}/analysis/variant/knockout/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runMendelianError \tab /{apiVersion}/analysis/variant/mendelianError/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runKnockout \tab /{apiVersion}/analysis/variant/knockout/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runMendelianError \tab /{apiVersion}/analysis/variant/mendelianError/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr metadata \tab /{apiVersion}/analysis/variant/metadata \tab project, study, file, sample, includeStudy, includeFile, includeSample, include, exclude \cr - queryMutationalSignature \tab /{apiVersion}/analysis/variant/mutationalSignature/query \tab study, sample, ct, biotype, fileData, filter, qual, region, gene, panel, panelModeOfInheritance, panelConfidence, panelFeatureType, panelRoleInCancer, panelIntersection, fitting \cr - runMutationalSignature \tab /{apiVersion}/analysis/variant/mutationalSignature/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runPlink \tab /{apiVersion}/analysis/variant/plink/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + queryMutationalSignature \tab /{apiVersion}/analysis/variant/mutationalSignature/query \tab study, sample, type, ct, biotype, fileData, filter, qual, region, gene, panel, panelModeOfInheritance, panelConfidence, panelFeatureType, panelRoleInCancer, panelIntersection, msId, msDescription \cr + runMutationalSignature \tab /{apiVersion}/analysis/variant/mutationalSignature/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runPlink \tab /{apiVersion}/analysis/variant/plink/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr query \tab /{apiVersion}/analysis/variant/query \tab include, exclude, limit, skip, count, sort, summary, approximateCount, approximateCountSamplingSize, savedFilter, id, region, type, reference, alternate, project, study, file, filter, qual, fileData, sample, genotype, sampleData, sampleAnnotation, sampleMetadata, unknownGenotype, sampleLimit, sampleSkip, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, includeStudy, includeFile, includeSample, includeSampleData, includeGenotype, includeSampleId, annotationExists, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, trait \cr - runRelatedness \tab /{apiVersion}/analysis/variant/relatedness/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runRvtests \tab /{apiVersion}/analysis/variant/rvtests/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runRelatedness \tab /{apiVersion}/analysis/variant/relatedness/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runRvtests \tab /{apiVersion}/analysis/variant/rvtests/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr aggregationStatsSample \tab /{apiVersion}/analysis/variant/sample/aggregationStats \tab savedFilter, region, type, project, study, file, filter, sample, genotype, sampleAnnotation, family, familyDisorder, familySegregation, familyMembers, familyProband, ct, biotype, populationFrequencyAlt, clinical, clinicalSignificance, clinicalConfirmedStatus, field \cr - runSampleEligibility \tab /{apiVersion}/analysis/variant/sample/eligibility/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runSampleQc \tab /{apiVersion}/analysis/variant/sample/qc/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runSampleEligibility \tab /{apiVersion}/analysis/variant/sample/eligibility/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runSampleQc \tab /{apiVersion}/analysis/variant/sample/qc/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr querySample \tab /{apiVersion}/analysis/variant/sample/query \tab limit, skip, variant, study, genotype \cr - runSample \tab /{apiVersion}/analysis/variant/sample/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runSample \tab /{apiVersion}/analysis/variant/sample/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr querySampleStats \tab /{apiVersion}/analysis/variant/sample/stats/query \tab region, type, study, file, filter, sampleData, ct, biotype, transcriptFlag, populationFrequencyAlt, clinical, clinicalSignificance, clinicalConfirmedStatus, study, filterTranscript, sample\link{*} \cr - runSampleStats \tab /{apiVersion}/analysis/variant/sample/stats/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runStatsExport \tab /{apiVersion}/analysis/variant/stats/export/run \tab project, study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr - runStats \tab /{apiVersion}/analysis/variant/stats/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, body\link{*} \cr + runSampleStats \tab /{apiVersion}/analysis/variant/sample/stats/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runStatsExport \tab /{apiVersion}/analysis/variant/stats/export/run \tab project, study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr + runStats \tab /{apiVersion}/analysis/variant/stats/run \tab study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body\link{*} \cr } } \section{Endpoint /{apiVersion}/analysis/variant/aggregationStats}{ @@ -271,6 +281,11 @@ Read cohort variant stats from list of cohorts. Compute cohort variant stats for the selected list of samples. } +\section{Endpoint /{apiVersion}/analysis/variant/exomiser/run}{ + +The Exomiser is a Java program that finds potential disease-causing variants from whole-exome or whole-genome sequencing data. +} + \section{Endpoint /{apiVersion}/analysis/variant/export/run}{ Filter and export variants from the variant storage to a file. @@ -306,6 +321,11 @@ Generate a genome plot for a given sample. Run a Genome Wide Association Study between two cohorts. } +\section{Endpoint /{apiVersion}/analysis/variant/hrDetect/run}{ + +Run HRDetect analysis for a given somatic sample. +} + \section{Endpoint /{apiVersion}/analysis/variant/index/run}{ \link{DEPRECATED} Use operation/variant/index. diff --git a/opencga-client/src/main/R/vignettes/opencgaR.R b/opencga-client/src/main/R/vignettes/opencgaR.R index 82500484c5..8ae99ebfe8 100644 --- a/opencga-client/src/main/R/vignettes/opencgaR.R +++ b/opencga-client/src/main/R/vignettes/opencgaR.R @@ -1,225 +1,230 @@ ## ----install, eval=FALSE, echo=TRUE------------------------------------------- -# ## Install opencgaR -# # install.packages("opencgaR_2.2.0.tar.gz", repos=NULL, type="source") - -## ----message=FALSE, warning=FALSE, include=FALSE------------------------------ -library(opencgaR) -library(glue) -library(dplyr) -library(tidyr) - -## ----eval=TRUE, results='hide'------------------------------------------------ -## Initialise connection using a configuration in R list -conf <- list(version="v2", rest=list(host="https://ws.opencb.org/opencga-prod")) -con <- initOpencgaR(opencgaConfig=conf) - -## ----eval=FALSE, results='hide'----------------------------------------------- -# ## Initialise connection using a configuration file (in YAML or JSON format) -# # conf <- "/path/to/conf/client-configuration.yml" -# # con <- initOpencgaR(opencgaConfig=conf) - -## ---- results='hide'---------------------------------------------------------- -# Log in -con <- opencgaLogin(opencga = con, userid = "demouser", passwd = "demouser", - autoRenew = TRUE, verbose = FALSE, showToken = FALSE) - -## ---- results='hide'---------------------------------------------------------- -projects <- projectClient(OpencgaR = con, endpointName = "search") -getResponseResults(projects)[[1]][,c("id","name", "description")] - -## ---- results='hide'---------------------------------------------------------- -projects <- projectClient(OpencgaR=con, project="population", endpointName="studies") -getResponseResults(projects)[[1]][,c("id","name", "description")] - -## ---- results='hide'---------------------------------------------------------- -study <- "population:1000g" - -# Study name -study_result <- studyClient(OpencgaR = con, studies = study, endpointName = "info") -study_name <- getResponseResults(study_result)[[1]][,"name"] -# Number of samples in this study -count_samples <- sampleClient(OpencgaR = con, endpointName = "search", - params=list(study=study, count=TRUE, limit=0)) -num_samples <- getResponseNumMatches(count_samples) -# Number of individuals -count_individuals <- individualClient(OpencgaR = con, endpointName = "search", - params=list(study=study, count=TRUE, limit=0)) -num_individuals <- getResponseNumMatches(count_individuals) -# Number of clinical cases -count_cases <- clinicalClient(OpencgaR = con, endpointName = "search", - params=list(study=study, count=TRUE, limit=0)) -num_cases <- getResponseNumMatches(count_cases) -# Number of variants -count_variants <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, count=TRUE, limit=0)) -num_variants <- getResponseNumMatches(count_variants) - -to_print <- "Study name: {study_name} - - Number of samples: {num_samples} - - Number of individuals: {num_individuals} - - Number of cases: {num_cases} - - Number of variants: {num_variants}" -glue(to_print) - -## ---- results='hide'---------------------------------------------------------- -# Retrieve the first variant -variant_example <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, limit=1, type="SNV")) -variant_id <- getResponseResults(variant_example)[[1]][,"id"] -glue("Variant example: {variant_id}") - -## ----message=FALSE, warning=FALSE, results='hide'----------------------------- -# Retrieve the samples that have this variant -variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", - params=list(study=study, variant=variant_id)) - -glue("Number of samples with this variant: - {getResponseAttributes(variant_result)$numTotalSamples}") -data_keys <- unlist(getResponseResults(variant_result)[[1]][, "studies"][[1]][, "sampleDataKeys"]) - -df <- getResponseResults(variant_result)[[1]][, "studies"][[1]][, "samples"][[1]] %>% - select(-fileIndex) %>% unnest_wider(data) -colnames(df) <- c("samples", data_keys) -df - -## ---- results='hide', message=FALSE, warning=FALSE, include=FALSE------------- -# Fetch the samples with a specific genotype -genotype <- "0/1,1/1" -variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", - params=list(study=study, variant=variant_id, - genotype=genotype)) -glue("Number of samples with genotype {genotype} in this variant: - {getResponseAttributes(variant_result)$numTotalSamples}") - -## ---- results='hide'---------------------------------------------------------- -# Search for homozygous alternate samples -genotype <- "1/1" -variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", - params=list(study=study, variant=variant_id, - genotype=genotype)) -glue("Number of samples with genotype {genotype} in this variant: - {getResponseAttributes(variant_result)$numTotalSamples}") - -## ---- results='hide'---------------------------------------------------------- -# Search for homozygous alternate samples -region <- "1:62273600-62273700" -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, region=region, limit=100)) - -glue("Number of variants in region {region}: {getResponseNumResults(variant_result)}") -getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) - -## ---- results='hide'---------------------------------------------------------- -## Filter by gene -genes <- "HCN3,MTOR" -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, gene=genes, limit=10)) -getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) - -## ---- results='hide'---------------------------------------------------------- -## Filter by xref -snp <- "rs147394986" -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, xref=snp, limit=5)) -getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) - -## ---- results='hide'---------------------------------------------------------- -## Filter by missense variants and stop_gained -ct <- "missense_variant,stop_gained" -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, ct=ct, limit=5)) -glue("Number of missense and stop gained variants: {getResponseNumMatches(variant_result)}") -getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) - -## ---- results='hide'---------------------------------------------------------- -## Filter by LoF (alias created containing 9 different CTs) -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, ct="lof", limit=5)) -glue("Number of LoF variants: {getResponseNumMatches(variant_result)}") -getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) - -## ---- results='hide'---------------------------------------------------------- -## Filter by population alternate frequency -population_frequency_alt <- '1kG_phase3:ALL<0.001' -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, - populationFrequencyAlt=population_frequency_alt, - limit=5)) -glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# ## Install opencgaR +# # install.packages("opencgaR_4.0.0.tar.gz", repos=NULL, type="source") + +## ----message=FALSE, eval=FALSE, warning=FALSE, include=FALSE------------------ +# library(opencgaR) +# library(glue) +# library(dplyr) +# library(tidyr) + +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Initialise connection using a configuration in R list +# conf <- list(version="v2", rest=list(host="https://test.app.zettagenomics.com/myenv/opencga/")) +# con <- initOpencgaR(opencgaConfig=conf) + +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Initialise connection using a configuration file (in YAML or JSON format) +# # conf <- "/path/to/conf/client-configuration.yml" +# # con <- initOpencgaR(opencgaConfig=conf) + +## ----eval=FALSE, results='hide'----------------------------------------------- +# # Log in +# con <- opencgaLogin(opencga = con, userid = "userID", passwd = "1234567890", +# organization = "myorg", autoRenew = TRUE, verbose = FALSE, +# showToken = FALSE) + +## ----eval=FALSE, results='hide'----------------------------------------------- +# projects <- projectClient(OpencgaR = con, endpointName = "search") +# getResponseResults(projects)[[1]][,c("id","name", "description")] + +## ----eval=FALSE, results='hide'----------------------------------------------- +# projects <- projectClient(OpencgaR=con, project="population", endpointName="studies") +# getResponseResults(projects)[[1]][,c("id","name", "description")] + +## ----eval=FALSE, results='hide'----------------------------------------------- +# # study <- "test@population:1000G" +# study <- "test@germline:platinum" +# +# # Study name +# study_result <- studyClient(OpencgaR = con, studies = study, endpointName = "info") +# study_name <- getResponseResults(study_result)[[1]][,"name"] +# # Number of samples in this study +# count_samples <- sampleClient(OpencgaR = con, endpointName = "search", +# params=list(study=study, count=TRUE, limit=0)) +# num_samples <- getResponseNumMatches(count_samples) +# # Number of individuals +# count_individuals <- individualClient(OpencgaR = con, endpointName = "search", +# params=list(study=study, count=TRUE, limit=0)) +# num_individuals <- getResponseNumMatches(count_individuals) +# # Number of clinical cases +# count_cases <- clinicalClient(OpencgaR = con, endpointName = "search", +# params=list(study=study, count=TRUE, limit=0)) +# num_cases <- getResponseNumMatches(count_cases) +# # Number of variants +# count_variants <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, count=TRUE, limit=0)) +# num_variants <- getResponseNumMatches(count_variants) +# +# to_print <- "Study name: {study_name} +# - Number of samples: {num_samples} +# - Number of individuals: {num_individuals} +# - Number of cases: {num_cases} +# - Number of variants: {num_variants}" +# glue(to_print) + +## ----eval=FALSE, results='hide'----------------------------------------------- +# # Retrieve the first variant +# variant_example <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, limit=1, type="SNV")) +# variant_id <- getResponseResults(variant_example)[[1]][,"id"] +# glue("Variant example: {variant_id}") + +## ----eval=FALSE, message=FALSE, warning=FALSE, results='hide'----------------- +# # Retrieve the samples that have this variant +# variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", +# params=list(study=study, variant=variant_id)) +# +# glue("Number of samples with this variant: +# {getResponseAttributes(variant_result)$numTotalSamples}") +# data_keys <- unlist(getResponseResults(variant_result)[[1]][, "studies"][[1]][, "sampleDataKeys"]) +# +# df <- getResponseResults(variant_result)[[1]][, "studies"][[1]][, "samples"][[1]] %>% +# select(-fileIndex) %>% unnest_wider(data, names_sep = "") +# colnames(df) <- c("samples", data_keys) +# df + +## ----eval=FALSE, results='hide', message=FALSE, warning=FALSE, include=FALSE---- +# # Fetch the samples with a specific genotype +# genotype <- "0/1,1/1" +# variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", +# params=list(study=study, variant=variant_id, +# genotype=genotype)) +# glue("Number of samples with genotype {genotype} in this variant: +# {getResponseAttributes(variant_result)$numTotalSamples}") + +## ----eval=FALSE, results='hide'----------------------------------------------- +# # Search for homozygous alternate samples +# genotype <- "1/1" +# variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", +# params=list(study=study, variant=variant_id, +# genotype=genotype)) +# glue("Number of samples with genotype {genotype} in this variant: +# {getResponseAttributes(variant_result)$numTotalSamples}") + +## ----eval=FALSE, results='hide'----------------------------------------------- +# # Search for homozygous alternate samples +# region <- "1:62273600-62273700" +# region <- "11:119345500-119345600" +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, region=region, limit=100)) +# +# glue("Number of variants in region {region}: {getResponseNumResults(variant_result)}") # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) -## ---- results='hide'---------------------------------------------------------- -## Filter by two population alternate frequency -## Remember to use commas for OR and semi-colon for AND -population_frequency_alt <- '1kG_phase3:ALL<0.001;GNOMAD_GENOMES:ALL<0.001' -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, - populationFrequencyAlt=population_frequency_alt, - limit=5)) -glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by gene +# genes <- "HCN3,MTOR" +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, gene=genes, limit=10)) +# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by xref +# #snp <- "rs147394986" +# hgvs <- "NP_002747.2:p.Arg67Trp" +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, xref=hgvs, limit=5)) # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) -## ---- results='hide'---------------------------------------------------------- -## Filter by population alternate frequency using a range of values -population_frequency_alt <- '1kG_phase3:ALL>0.001;1kG_phase3:ALL<0.005' -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, - populationFrequencyAlt=population_frequency_alt, - limit=5)) -glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by missense variants and stop_gained +# ct <- "missense_variant,stop_gained" +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, ct=ct, limit=5)) +# glue("Number of missense and stop gained variants: {getResponseNumMatches(variant_result)}") +# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by LoF (alias created containing 9 different CTs) +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, ct="lof", limit=5)) +# glue("Number of LoF variants: {getResponseNumMatches(variant_result)}") # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) -## ---- results='hide'---------------------------------------------------------- -## Filter by Consequence Type, Clinical Significance and population frequency -ct <- 'lof,missense_variant' -clinicalSignificance <- 'likely_pathogenic,pathogenic' -populationFrequencyAlt <- '1kG_phase3:ALL<0.01;GNOMAD_GENOMES:ALL<0.01' -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, limit=5, ct=ct, - clinicalSignificance=clinicalSignificance, - populationFrequencyAlt=population_frequency_alt)) -glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by population alternate frequency +# population_frequency_alt <- '1000G:ALL<0.001' +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, +# populationFrequencyAlt=population_frequency_alt, +# limit=5)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by two population alternate frequency +# ## Remember to use commas for OR and semi-colon for AND +# population_frequency_alt <- '1000G:ALL<0.001;GNOMAD_GENOMES:ALL<0.001' +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, +# populationFrequencyAlt=population_frequency_alt, +# limit=5)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) -## ---- results='hide'---------------------------------------------------------- -## Filter by cohort ALL frequency -cohort_stats <- paste0(study, ':ALL<0.001') -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, cohortStatsAlt=cohort_stats, - limit=5)) -glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by population alternate frequency using a range of values +# population_frequency_alt <- '1000G:ALL>0.001;1000G:ALL<0.005' +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, +# populationFrequencyAlt=population_frequency_alt, +# limit=5)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# +# # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by Consequence Type, Clinical Significance and population frequency +# ct <- 'lof,missense_variant' +# clinicalSignificance <- 'likely_pathogenic,pathogenic' +# populationFrequencyAlt <- '1000G:ALL<0.01;GNOMAD_GENOMES:ALL<0.01' +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, limit=5, ct=ct, +# clinicalSignificance=clinicalSignificance, +# populationFrequencyAlt=population_frequency_alt)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) -## ---- eval=FALSE-------------------------------------------------------------- -# ## Filter by custom cohorts' frequency -# cohort_stats = study + ':MY_COHORT_A<0.001' -# variant_result <- variantClient(OpencgaR = con, endpointName = "query", -# params=list(study=study, cohortStatsAlt=cohort_stats, -# limit=5)) -# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") -# -# ## Filter by more than one cohort frequency and consequence type -# cohort_stats = study + ':MY_COHORT_A<0.001' + ';' + study + ':MY_COHORT_B<0.001' -# variant_result <- variantClient(OpencgaR = con, endpointName = "query", -# params=list(study=study, cohortStatsAlt=cohort_stats, -# limit=5)) -# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") - -## ---- results='hide'---------------------------------------------------------- -variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, id=variant_id)) -getResponseResults(variant_result)[[1]]$studies[[1]]$stats[[1]][,c('cohortId', 'alleleCount', - 'altAlleleCount', 'altAlleleFreq')] -glue("Variant example: {variant_id}") - -## ---- results='hide', fig.width=7--------------------------------------------- -## Aggregate by number of LoF variants per gene -variants_agg <- variantClient(OpencgaR = con, endpointName = "aggregationStats", - params=list(study=study, ct='lof', fields="genes")) -df <- getResponseResults(variants_agg)[[1]]$buckets[[1]] -barplot(height = df$count, names.arg = df$value, las=2, cex.names = 0.6) +## ----eval=FALSE, results='hide'----------------------------------------------- +# ## Filter by cohort ALL frequency +# cohort_stats <- paste0(study, ':ALL<0.001') +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, cohortStatsAlt=cohort_stats, +# limit=5)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# +# # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) + +## ----eval=FALSE, eval=FALSE--------------------------------------------------- +# ## Filter by custom cohorts' frequency +# cohort_stats = study + ':MY_COHORT_A<0.001' +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, cohortStatsAlt=cohort_stats, +# limit=5)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") +# +# ## Filter by more than one cohort frequency and consequence type +# cohort_stats = study + ':MY_COHORT_A<0.001' + ';' + study + ':MY_COHORT_B<0.001' +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, cohortStatsAlt=cohort_stats, +# limit=5)) +# glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") + +## ----eval=FALSE, results='hide'----------------------------------------------- +# variant_result <- variantClient(OpencgaR = con, endpointName = "query", +# params=list(study=study, id=variant_id)) +# glue("Variant example: {variant_id}") +# getResponseResults(variant_result)[[1]]$studies[[1]]$stats[[1]][,c('cohortId', 'alleleCount', +# 'altAlleleCount', 'altAlleleFreq')] +# + +## ----eval=FALSE, results='hide', fig.width=7---------------------------------- +# ## Aggregate by number of LoF variants per gene +# variants_agg <- variantClient(OpencgaR = con, endpointName = "aggregationStats", +# params=list(study=study, ct='lof', fields="genes")) +# df <- getResponseResults(variants_agg)[[1]]$buckets[[1]] +# barplot(height = df$count, names.arg = df$value, las=2, cex.names = 0.6) diff --git a/opencga-client/src/main/R/vignettes/opencgaR.Rmd b/opencga-client/src/main/R/vignettes/opencgaR.Rmd index d5bf9f48c3..b9dbaf8583 100644 --- a/opencga-client/src/main/R/vignettes/opencgaR.Rmd +++ b/opencga-client/src/main/R/vignettes/opencgaR.Rmd @@ -15,18 +15,18 @@ output: rmarkdown::html_vignette OpenCGA is an open-source project that aims to provide a **Big Data storage engine and analysis framework for genomic scale data analysis** of hundreds of terabytes or even petabytes. For users, its main features include uploading and downloading files to a repository, storing their information in a generic way (non-dependant of the original file-format) and retrieving this information efficiently. For developers, it will be a platform for supporting the most used bioinformatics file formats and accelerating the development of visualization and analysis applications. -The OpencgaR client provides a user-friendly interface to work with OpenCGA REST Web Services through R and has been implemented following the Bioconductor guidelines for package development which promote high-quality, well documented and inter-operable software. The source code of the R package can be found in [GitHub](https://github.com/opencb/opencga/tree/develop/opencga-client/src/main/R). +The OpencgaR client provides a user-friendly interface to work with OpenCGA REST Web Services through R and has been implemented following the Bioconductor guidelines for package development which promote high-quality, well documented and inter-operable software. The source code of the R package can be found in [GitHub](https://github.com/opencb/opencga/tree/develop/opencga-client/src/main/R). ## Installation and configuration -The R client requires at least R version 3.4, although most of the code is fully compatible with earlier versions. The pre-build R package of the R client can be downloaded from the OpenCGA GitHub Release at https://github.com/opencb/opencga/releases and installed using the `install.packages` function in R. `install.packages` can also install a source package from a remote `.tar.gz` file by providing the URL to such file. +The R client requires at least R version 3.4, although most of the code is fully compatible with earlier versions. The pre-build R package of the R client can be downloaded from the OpenCGA GitHub Release at and installed using the `install.packages` function in R. `install.packages` can also install a source package from a remote `.tar.gz` file by providing the URL to such file. ```{r install, eval=FALSE, echo=TRUE} ## Install opencgaR -# install.packages("opencgaR_2.2.0.tar.gz", repos=NULL, type="source") +# install.packages("opencgaR_4.0.0.tar.gz", repos=NULL, type="source") ``` -```{r message=FALSE, warning=FALSE, include=FALSE} +```{r message=FALSE, eval=FALSE, warning=FALSE, include=FALSE} library(opencgaR) library(glue) library(dplyr) @@ -41,9 +41,9 @@ The ***initOpencgaR*** function accepts either host and version information or a The code below shows different ways to initialise the OpenCGA connection with the REST server. -```{r eval=TRUE, results='hide'} +```{r eval=FALSE, results='hide'} ## Initialise connection using a configuration in R list -conf <- list(version="v2", rest=list(host="https://ws.opencb.org/opencga-prod")) +conf <- list(version="v2", rest=list(host="https://test.app.zettagenomics.com/myenv/opencga/")) con <- initOpencgaR(opencgaConfig=conf) ``` @@ -55,30 +55,32 @@ con <- initOpencgaR(opencgaConfig=conf) Once the connection has been initialised users can login specifying their OpenCGA user ID and password. -```{r, results='hide'} +```{r eval=FALSE, results='hide'} # Log in -con <- opencgaLogin(opencga = con, userid = "demouser", passwd = "demouser", - autoRenew = TRUE, verbose = FALSE, showToken = FALSE) +con <- opencgaLogin(opencga = con, userid = "userID", passwd = "1234567890", + organization = "myorg", autoRenew = TRUE, verbose = FALSE, + showToken = FALSE) ``` # Retrieving basic information about projects, users and studies Retrieving the list of projects accessible to the user: -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} projects <- projectClient(OpencgaR = con, endpointName = "search") getResponseResults(projects)[[1]][,c("id","name", "description")] ``` List the studies accessible in a project -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} projects <- projectClient(OpencgaR=con, project="population", endpointName="studies") getResponseResults(projects)[[1]][,c("id","name", "description")] ``` -```{r, results='hide'} -study <- "population:1000g" +```{r, eval=FALSE, results='hide'} +# study <- "test@population:1000G" +study <- "test@germline:platinum" # Study name study_result <- studyClient(OpencgaR = con, studies = study, endpointName = "info") @@ -114,7 +116,7 @@ glue(to_print) As an example we are going to retrieve a random variant from the database -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} # Retrieve the first variant variant_example <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, limit=1, type="SNV")) @@ -124,7 +126,7 @@ glue("Variant example: {variant_id}") ## Fetch the samples containing a specific variant -```{r message=FALSE, warning=FALSE, results='hide'} +```{r eval=FALSE, message=FALSE, warning=FALSE, results='hide'} # Retrieve the samples that have this variant variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", params=list(study=study, variant=variant_id)) @@ -134,14 +136,14 @@ glue("Number of samples with this variant: data_keys <- unlist(getResponseResults(variant_result)[[1]][, "studies"][[1]][, "sampleDataKeys"]) df <- getResponseResults(variant_result)[[1]][, "studies"][[1]][, "samples"][[1]] %>% - select(-fileIndex) %>% unnest_wider(data) + select(-fileIndex) %>% unnest_wider(data, names_sep = "") colnames(df) <- c("samples", data_keys) df ``` ## Fetch samples by genotype -```{r, results='hide', message=FALSE, warning=FALSE, include=FALSE} +```{r, eval=FALSE, results='hide', message=FALSE, warning=FALSE, include=FALSE} # Fetch the samples with a specific genotype genotype <- "0/1,1/1" variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", @@ -151,7 +153,7 @@ glue("Number of samples with genotype {genotype} in this variant: {getResponseAttributes(variant_result)$numTotalSamples}") ``` -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} # Search for homozygous alternate samples genotype <- "1/1" variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", @@ -163,9 +165,10 @@ glue("Number of samples with genotype {genotype} in this variant: ## Retrieve variants by region -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} # Search for homozygous alternate samples region <- "1:62273600-62273700" +region <- "11:119345500-119345600" variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, region=region, limit=100)) @@ -177,11 +180,10 @@ getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation Variants can be filtered by gene using the parameters gene or xref: -- `gene`: only accepts gene IDs -- `xref`: accepts different IDs including gene, transcript, dbSNP, ... -Remember you can pass different IDs using comma as separator. +- `gene`: only accepts gene IDs +- `xref`: accepts different IDs including gene, transcript, dbSNP, ... Remember you can pass different IDs using comma as separator. -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by gene genes <- "HCN3,MTOR" variant_result <- variantClient(OpencgaR = con, endpointName = "query", @@ -189,20 +191,20 @@ variant_result <- variantClient(OpencgaR = con, endpointName = "query", getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) ``` -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by xref -snp <- "rs147394986" +#snp <- "rs147394986" +hgvs <- "NP_002747.2:p.Arg67Trp" variant_result <- variantClient(OpencgaR = con, endpointName = "query", - params=list(study=study, xref=snp, limit=5)) + params=list(study=study, xref=hgvs, limit=5)) getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) ``` - ## Retrieve variants by consequence type -OpenCGA provides a rich variant annotation that includes Ensembl consequence types (https://m.ensembl.org/info/genome/variation/prediction/predicted_data.html). You can filter by consequence type by using parameter `ct`. You can provide a list of consequence type names separated by comma. Also, an alias called `lof` filter by a combination of loss-of-function terms. +OpenCGA provides a rich variant annotation that includes Ensembl consequence types (). You can filter by consequence type by using parameter `ct`. You can provide a list of consequence type names separated by comma. Also, an alias called `lof` filter by a combination of loss-of-function terms. -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by missense variants and stop_gained ct <- "missense_variant,stop_gained" variant_result <- variantClient(OpencgaR = con, endpointName = "query", @@ -211,7 +213,7 @@ glue("Number of missense and stop gained variants: {getResponseNumMatches(varian getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) ``` -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by LoF (alias created containing 9 different CTs) variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, ct="lof", limit=5)) @@ -223,17 +225,17 @@ getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation OpenCGA allows to filter variants by population frequencies, including: -- Minor Allele frequency (MAF) with the parameter `populationFrequencyMaf` -- Alternate Allele frequency (ALT) with the parameter `populationFrequencyAlt` -- Reference Allele frequency with the parameter `populationFrequencyRef` +- Minor Allele frequency (MAF) with the parameter `populationFrequencyMaf` +- Alternate Allele frequency (ALT) with the parameter `populationFrequencyAlt` +- Reference Allele frequency with the parameter `populationFrequencyRef` The population frequency studies indexed in OpenCGA include different sources such as **genomAD** or **1000 Genomes**. The syntax for the query parameter is: `{study}:{population}:{cohort}[<|>|<=|>=]{proportion}`. Note that you can specify several populations separated by comma (OR) or by semi-colon (AND), e.g. for all variants less than 1% in the two studies we should use `1kG_phase3:ALL<0.01;GNOMAD_GENOMES:ALL<0.01` -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by population alternate frequency -population_frequency_alt <- '1kG_phase3:ALL<0.001' +population_frequency_alt <- '1000G:ALL<0.001' variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, populationFrequencyAlt=population_frequency_alt, @@ -242,22 +244,22 @@ glue("Number of variants matching the filter: {getResponseNumMatches(variant_res # getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) ``` -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by two population alternate frequency ## Remember to use commas for OR and semi-colon for AND -population_frequency_alt <- '1kG_phase3:ALL<0.001;GNOMAD_GENOMES:ALL<0.001' +population_frequency_alt <- '1000G:ALL<0.001;GNOMAD_GENOMES:ALL<0.001' variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, populationFrequencyAlt=population_frequency_alt, limit=5)) glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") -# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) +getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) ``` -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by population alternate frequency using a range of values -population_frequency_alt <- '1kG_phase3:ALL>0.001;1kG_phase3:ALL<0.005' +population_frequency_alt <- '1000G:ALL>0.001;1000G:ALL<0.005' variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, populationFrequencyAlt=population_frequency_alt, @@ -268,28 +270,28 @@ glue("Number of variants matching the filter: {getResponseNumMatches(variant_res ``` ## Complex variant queries + OpenCGA implements a very advanced variant query engine that allows to combine many filters to build very complex and useful queries. In this section you will find some examples. -```{r, results='hide'} +```{r, eval=FALSE, results='hide'} ## Filter by Consequence Type, Clinical Significance and population frequency ct <- 'lof,missense_variant' clinicalSignificance <- 'likely_pathogenic,pathogenic' -populationFrequencyAlt <- '1kG_phase3:ALL<0.01;GNOMAD_GENOMES:ALL<0.01' +populationFrequencyAlt <- '1000G:ALL<0.01;GNOMAD_GENOMES:ALL<0.01' variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, limit=5, ct=ct, clinicalSignificance=clinicalSignificance, populationFrequencyAlt=population_frequency_alt)) glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}") -# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) +getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation) ``` - ## Filter variants using cohort information -OpenCGA allows users to define cohorts of samples and calculate and index the allele and genotype frequencies among other stats. By default, a cohort called "ALL" containing all samples is defined and the variant stats are calculated. You can filter by the internal cohort stats using the parameter `cohortStatsAlt` and pass the study and the cohort you would like to filter by. Format: `cohortStatsAlt={study}:{cohort}[<|>|<=|>=]{proportion}` -Note that any cohorts created will be shared among the studies belonging to the same project. -```{r, results='hide'} +OpenCGA allows users to define cohorts of samples and calculate and index the allele and genotype frequencies among other stats. By default, a cohort called "ALL" containing all samples is defined and the variant stats are calculated. You can filter by the internal cohort stats using the parameter `cohortStatsAlt` and pass the study and the cohort you would like to filter by. Format: `cohortStatsAlt={study}:{cohort}[<|>|<=|>=]{proportion}` Note that any cohorts created will be shared among the studies belonging to the same project. + +```{r, eval=FALSE, results='hide'} ## Filter by cohort ALL frequency cohort_stats <- paste0(study, ':ALL<0.001') variant_result <- variantClient(OpencgaR = con, endpointName = "query", @@ -301,7 +303,8 @@ glue("Number of variants matching the filter: {getResponseNumMatches(variant_res ``` The following examples are just examples of how queries should be made, they are not intended to be executed since the custom cohorts have not been created. -```{r, eval=FALSE} + +```{r, eval=FALSE, eval=FALSE} ## Filter by custom cohorts' frequency cohort_stats = study + ':MY_COHORT_A<0.001' variant_result <- variantClient(OpencgaR = con, endpointName = "query", @@ -318,23 +321,24 @@ glue("Number of variants matching the filter: {getResponseNumMatches(variant_res ``` ## Retrieve cohort frequencies for a specific variant -```{r, results='hide'} + +```{r, eval=FALSE, results='hide'} variant_result <- variantClient(OpencgaR = con, endpointName = "query", params=list(study=study, id=variant_id)) +glue("Variant example: {variant_id}") getResponseResults(variant_result)[[1]]$studies[[1]]$stats[[1]][,c('cohortId', 'alleleCount', 'altAlleleCount', 'altAlleleFreq')] -glue("Variant example: {variant_id}") + ``` # Aggregated stats + ## Number of LoF variants per gene -```{r, results='hide', fig.width=7} +```{r, eval=FALSE, results='hide', fig.width=7} ## Aggregate by number of LoF variants per gene variants_agg <- variantClient(OpencgaR = con, endpointName = "aggregationStats", params=list(study=study, ct='lof', fields="genes")) df <- getResponseResults(variants_agg)[[1]]$buckets[[1]] barplot(height = df$count, names.arg = df$value, las=2, cex.names = 0.6) ``` - - diff --git a/opencga-client/src/main/R/vignettes/opencgaR.html b/opencga-client/src/main/R/vignettes/opencgaR.html new file mode 100644 index 0000000000..94440b5dd2 --- /dev/null +++ b/opencga-client/src/main/R/vignettes/opencgaR.html @@ -0,0 +1,675 @@ + + + + + + + + + + + + + + + + +Introduction to opencgaR + + + + + + + + + + + + + + + + + + + + + + + + + + +

Introduction to opencgaR

+

Zetta Genomics

+

28 November 2024

+ + + +
+

Introduction

+

OpenCGA is an open-source project that aims to provide a Big +Data storage engine and analysis framework for genomic scale data +analysis of hundreds of terabytes or even petabytes. For users, +its main features include uploading and downloading files to a +repository, storing their information in a generic way (non-dependant of +the original file-format) and retrieving this information efficiently. +For developers, it will be a platform for supporting the most used +bioinformatics file formats and accelerating the development of +visualization and analysis applications.

+

The OpencgaR client provides a user-friendly interface to work with +OpenCGA REST Web Services through R and has been implemented following +the Bioconductor guidelines for package development which promote +high-quality, well documented and inter-operable software. The source +code of the R package can be found in GitHub.

+
+

Installation and configuration

+

The R client requires at least R version 3.4, although most of the +code is fully compatible with earlier versions. The pre-build R package +of the R client can be downloaded from the OpenCGA GitHub Release at https://github.com/opencb/opencga/releases and installed +using the install.packages function in R. +install.packages can also install a source package from a +remote .tar.gz file by providing the URL to such file.

+
## Install opencgaR
+# install.packages("opencgaR_4.0.0.tar.gz", repos=NULL, type="source")
+
+
+
+

Connection and authentication into an OpenCGA instance

+

A set of methods have been implemented to deal with the connectivity +and log into the REST host. Connection to the host is done in two steps +using the functions initOpencgaR and +opencgaLogin for defining the connection +details and login, respectively.

+

The initOpencgaR function accepts either +host and version information or a configuration file (as a +list() or in YAML +or JSON format). The opencgaLogin function +establishes the connection with the host. It requires an opencgaR object +(created using the initOpencgaR function) and the login +details: user and password. User and password can be introduced +interactively through a popup window using +interactive=TRUE, to avoid typing user credentials within +the R script or a config file.

+

The code below shows different ways to initialise the OpenCGA +connection with the REST server.

+
## Initialise connection using a configuration in R list
+conf <- list(version="v2", rest=list(host="https://test.app.zettagenomics.com/myenv/opencga/"))
+con <- initOpencgaR(opencgaConfig=conf)
+
## Initialise connection using a configuration file (in YAML or JSON format)
+# conf <- "/path/to/conf/client-configuration.yml"
+# con <- initOpencgaR(opencgaConfig=conf)
+

Once the connection has been initialised users can login specifying +their OpenCGA user ID and password.

+
# Log in
+con <- opencgaLogin(opencga = con, userid = "userID", passwd = "1234567890", 
+                    organization = "myorg", autoRenew = TRUE, verbose = FALSE, 
+                    showToken = FALSE)
+
+
+

Retrieving basic information about projects, users and studies

+

Retrieving the list of projects accessible to the user:

+
projects <- projectClient(OpencgaR = con, endpointName = "search")
+getResponseResults(projects)[[1]][,c("id","name", "description")]
+

List the studies accessible in a project

+
projects <- projectClient(OpencgaR=con, project="population", endpointName="studies")
+getResponseResults(projects)[[1]][,c("id","name", "description")]
+
# study <- "test@population:1000G"
+study <- "test@germline:platinum"
+
+# Study name
+study_result <- studyClient(OpencgaR = con, studies = study, endpointName = "info")
+study_name <- getResponseResults(study_result)[[1]][,"name"]
+# Number of samples in this study
+count_samples <- sampleClient(OpencgaR = con, endpointName = "search", 
+                              params=list(study=study, count=TRUE, limit=0))
+num_samples <- getResponseNumMatches(count_samples)
+# Number of individuals
+count_individuals <- individualClient(OpencgaR = con, endpointName = "search", 
+                                      params=list(study=study, count=TRUE, limit=0))
+num_individuals <- getResponseNumMatches(count_individuals)
+# Number of clinical cases
+count_cases <- clinicalClient(OpencgaR = con, endpointName = "search", 
+                              params=list(study=study, count=TRUE, limit=0))
+num_cases <- getResponseNumMatches(count_cases)
+# Number of variants
+count_variants <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, count=TRUE, limit=0))
+num_variants <- getResponseNumMatches(count_variants)
+
+to_print <- "Study name: {study_name}
+            - Number of samples: {num_samples}
+            - Number of individuals: {num_individuals}
+            - Number of cases: {num_cases}
+            - Number of variants: {num_variants}"
+glue(to_print)
+
+
+

Extracting information about variants and biomarkers of +interest

+
+

Fetch the samples containing a specific variant

+

As an example we are going to retrieve a random variant from the +database

+
# Retrieve the first variant
+variant_example <- variantClient(OpencgaR = con, endpointName = "query", 
+                                 params=list(study=study, limit=1, type="SNV"))
+variant_id <- getResponseResults(variant_example)[[1]][,"id"]
+glue("Variant example: {variant_id}")
+
+
+

Fetch the samples containing a specific variant

+
# Retrieve the samples that have this variant
+variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", 
+                                params=list(study=study, variant=variant_id))
+
+glue("Number of samples with this variant: 
+     {getResponseAttributes(variant_result)$numTotalSamples}")
+data_keys <- unlist(getResponseResults(variant_result)[[1]][, "studies"][[1]][, "sampleDataKeys"])
+
+df <- getResponseResults(variant_result)[[1]][, "studies"][[1]][, "samples"][[1]] %>% 
+        select(-fileIndex) %>% unnest_wider(data, names_sep = "")
+colnames(df) <- c("samples", data_keys)
+df
+
+
+

Fetch samples by genotype

+
# Search for homozygous alternate samples
+genotype <- "1/1"
+variant_result <- variantClient(OpencgaR = con, endpointName = "querySample", 
+                                params=list(study=study, variant=variant_id, 
+                                            genotype=genotype))
+glue("Number of samples with genotype {genotype} in this variant: 
+     {getResponseAttributes(variant_result)$numTotalSamples}")
+
+
+

Retrieve variants by region

+
# Search for homozygous alternate samples
+region <- "1:62273600-62273700"
+region <- "11:119345500-119345600"
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, region=region, limit=100))
+
+glue("Number of variants in region {region}: {getResponseNumResults(variant_result)}")
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
+
+

Retrieve variants by feature

+

Variants can be filtered by gene using the parameters gene or +xref:

+
    +
  • gene: only accepts gene IDs
  • +
  • xref: accepts different IDs including gene, transcript, +dbSNP, … Remember you can pass different IDs using comma as +separator.
  • +
+
## Filter by gene
+genes <- "HCN3,MTOR"
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, gene=genes, limit=10))
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
## Filter by xref
+#snp <- "rs147394986"
+hgvs <- "NP_002747.2:p.Arg67Trp"
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, xref=hgvs, limit=5))
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
+
+

Retrieve variants by consequence type

+

OpenCGA provides a rich variant annotation that includes Ensembl +consequence types (https://m.ensembl.org/info/genome/variation/prediction/predicted_data.html). +You can filter by consequence type by using parameter ct. +You can provide a list of consequence type names separated by comma. +Also, an alias called lof filter by a combination of +loss-of-function terms.

+
## Filter by missense variants and stop_gained
+ct <- "missense_variant,stop_gained"
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, ct=ct, limit=5))
+glue("Number of missense and stop gained variants: {getResponseNumMatches(variant_result)}")
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
## Filter by LoF (alias created containing 9 different CTs)
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, ct="lof", limit=5))
+glue("Number of LoF variants: {getResponseNumMatches(variant_result)}")
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
+
+

Filter variants by population frequencies

+

OpenCGA allows to filter variants by population frequencies, +including:

+
    +
  • Minor Allele frequency (MAF) with the parameter +populationFrequencyMaf
  • +
  • Alternate Allele frequency (ALT) with the parameter +populationFrequencyAlt
  • +
  • Reference Allele frequency with the parameter +populationFrequencyRef
  • +
+

The population frequency studies indexed in OpenCGA include different +sources such as genomAD or 1000 +Genomes.

+

The syntax for the query parameter is: +{study}:{population}:{cohort}[<|>|<=|>=]{proportion}. +Note that you can specify several populations separated by comma (OR) or +by semi-colon (AND), e.g. for all variants less than 1% in the two +studies we should use +1kG_phase3:ALL<0.01;GNOMAD_GENOMES:ALL<0.01

+
## Filter by population alternate frequency
+population_frequency_alt <- '1000G:ALL<0.001'
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, 
+                                            populationFrequencyAlt=population_frequency_alt, 
+                                            limit=5))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
## Filter by two population alternate frequency
+## Remember to use commas for OR and semi-colon for AND
+population_frequency_alt <- '1000G:ALL<0.001;GNOMAD_GENOMES:ALL<0.001'
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, 
+                                            populationFrequencyAlt=population_frequency_alt, 
+                                            limit=5))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
## Filter by population alternate frequency using a range of values
+population_frequency_alt <- '1000G:ALL>0.001;1000G:ALL<0.005'
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, 
+                                            populationFrequencyAlt=population_frequency_alt, 
+                                            limit=5))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+
+# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
+
+

Complex variant queries

+

OpenCGA implements a very advanced variant query engine that allows +to combine many filters to build very complex and useful queries. In +this section you will find some examples.

+
## Filter by Consequence Type, Clinical Significance and population frequency
+ct <- 'lof,missense_variant'
+clinicalSignificance <- 'likely_pathogenic,pathogenic'
+populationFrequencyAlt <- '1000G:ALL<0.01;GNOMAD_GENOMES:ALL<0.01'
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, limit=5, ct=ct, 
+                                            clinicalSignificance=clinicalSignificance,
+                                            populationFrequencyAlt=population_frequency_alt))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+
+getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+
+
+

Filter variants using cohort information

+

OpenCGA allows users to define cohorts of samples and calculate and +index the allele and genotype frequencies among other stats. By default, +a cohort called “ALL” containing all samples is defined and the variant +stats are calculated. You can filter by the internal cohort stats using +the parameter cohortStatsAlt and pass the study and the +cohort you would like to filter by. Format: +cohortStatsAlt={study}:{cohort}[<|>|<=|>=]{proportion} +Note that any cohorts created will be shared among the studies belonging +to the same project.

+
## Filter by cohort ALL frequency
+cohort_stats <- paste0(study, ':ALL<0.001')
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, cohortStatsAlt=cohort_stats, 
+                                            limit=5))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+
+# getResponseResults(variant_result)[[1]] %>% select(-names, -studies, -annotation)
+

The following examples are just examples of how queries should be +made, they are not intended to be executed since the custom cohorts have +not been created.

+
## Filter by custom cohorts' frequency
+cohort_stats = study + ':MY_COHORT_A<0.001'
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, cohortStatsAlt=cohort_stats, 
+                                            limit=5))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+
+## Filter by more than one cohort frequency and consequence type
+cohort_stats = study + ':MY_COHORT_A<0.001' + ';' + study + ':MY_COHORT_B<0.001'
+variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, cohortStatsAlt=cohort_stats, 
+                                            limit=5))
+glue("Number of variants matching the filter: {getResponseNumMatches(variant_result)}")
+
+
+

Retrieve cohort frequencies for a specific variant

+
variant_result <- variantClient(OpencgaR = con, endpointName = "query", 
+                                params=list(study=study, id=variant_id))
+glue("Variant example: {variant_id}")
+getResponseResults(variant_result)[[1]]$studies[[1]]$stats[[1]][,c('cohortId', 'alleleCount',
+                                                              'altAlleleCount', 'altAlleleFreq')]
+
+
+
+

Aggregated stats

+
+

Number of LoF variants per gene

+
## Aggregate by number of LoF variants per gene
+variants_agg <- variantClient(OpencgaR = con, endpointName = "aggregationStats", 
+                              params=list(study=study, ct='lof', fields="genes"))
+df <- getResponseResults(variants_agg)[[1]]$buckets[[1]]
+barplot(height = df$count, names.arg = df$value, las=2, cex.names = 0.6)
+
+
+ + + + + + + + + + + From 48e15923b918fa11f3094d6720da1e2e524a9e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 28 Nov 2024 17:11:09 +0000 Subject: [PATCH 073/122] storage: Fix NPE running SampleVariantStats #TASK-6722 --- .../variant/stats/SampleVariantStatsAnalysis.java | 12 ++++++++++-- .../analysis/variant/VariantAnalysisTest.java | 5 +++++ ...leVariantStatsHBaseMapReduceAnalysisExecutor.java | 5 ++++- .../hadoop/variant/mr/VariantMapReduceUtil.java | 4 +++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java index 3d91b877bd..a12072e3a1 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java @@ -26,6 +26,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy; +import org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils; import org.opencb.opencga.catalog.db.api.SampleDBAdaptor; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.common.BatchUtils; @@ -62,6 +63,7 @@ public class SampleVariantStatsAnalysis extends OpenCgaToolScopeStudy { @ToolParams protected SampleVariantStatsAnalysisParams toolParams; private ArrayList checkedSamplesList; + private Query variantQuery; private SampleVariantStatsAnalysisExecutor toolExecutor; private List> batches; private int numBatches; @@ -165,6 +167,12 @@ protected void check() throws Exception { } } + + variantQuery = toolParams.getVariantQuery() == null ? new Query() : toolParams.getVariantQuery().toQuery(); + variantQuery.put(VariantQueryParam.STUDY.key(), study); + variantQuery = new VariantCatalogQueryUtils(catalogManager) + .parseQuery(variantQuery, new QueryOptions(), variantStorageManager.getCellBaseUtils(study, token), token); + checkedSamplesList = new ArrayList<>(allSamples); checkedSamplesList.sort(String::compareTo); if (checkedSamplesList.isEmpty()) { @@ -173,7 +181,7 @@ protected void check() throws Exception { } else { // check read permission variantStorageManager.checkQueryPermissions( - new Query() + new Query(variantQuery) .append(VariantQueryParam.STUDY.key(), study) .append(VariantQueryParam.INCLUDE_SAMPLE.key(), checkedSamplesList), new QueryOptions(), @@ -246,7 +254,7 @@ protected void run() throws ToolException { .setOutputFile(tmpOutputFile) .setStudy(study) .setSampleNames(batchSamples) - .setVariantQuery(toolParams.getVariantQuery() == null ? new Query() : toolParams.getVariantQuery().toQuery()) + .setVariantQuery(variantQuery) .execute(); if (tmpOutputFile != outputFile) { diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java index f9e9392be8..8f3f9695a3 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java @@ -429,6 +429,11 @@ public void testSampleStatsSampleFilter() throws Exception { new Query(VariantQueryParam.SAMPLE_DATA.key(), "DS>1;GT!=1|1")); } + @Test + public void testSampleStatsWithGeneFilter() throws Exception { + sampleVariantStats(null, "stats_BRCA1", false, 1, file.getSampleIds().subList(0, 2), false, new VariantQuery().gene("BRCA1")); + } + @Test public void testSampleStats() throws Exception { sampleVariantStats("1,2", "stats_1", false, 1, file.getSampleIds().subList(0, 2)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java index 41b8ea016e..8b4b017535 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java @@ -1,11 +1,13 @@ package org.opencb.opencga.storage.hadoop.variant.analysis.stats; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.exceptions.ToolExecutorException; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.opencb.opencga.core.tools.variant.SampleVariantStatsAnalysisExecutor; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.analysis.HadoopVariantStorageToolExecutor; @@ -43,8 +45,9 @@ public void run() throws ToolException { } } + ParsedVariantQuery variantQuery = engine.parseQuery(getVariantQuery(), new QueryOptions()); ObjectMap params = new ObjectMap(engine.getOptions()) - .appendAll(getVariantQuery()) + .appendAll(variantQuery.getQuery()) .append(SampleVariantStatsDriver.SAMPLES, sampleNames) .append(SampleVariantStatsDriver.OUTPUT, getOutputFile().toAbsolutePath().toUri()); engine.getMRExecutor().run(SampleVariantStatsDriver.class, SampleVariantStatsDriver.buildArgs( diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index 46b059e05c..56ecaf7c87 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -346,7 +346,9 @@ public static void initVariantRowMapperJobFromHBase(Job job, String variantTable job.setInputFormatClass(HBaseVariantRowTableInputFormat.class); job.getConfiguration().setBoolean(HBaseVariantRowTableInputFormat.MULTI_SCANS, scans.size() > 1); job.getConfiguration().setBoolean(HBaseVariantRowTableInputFormat.USE_SAMPLE_INDEX_TABLE_INPUT_FORMAT, useSampleIndex); - job.getConfiguration().set(HBaseVariantRowTableInputFormat.SAMPLE_INDEX_TABLE, sampleIndexTable); + if (sampleIndexTable != null) { + job.getConfiguration().set(HBaseVariantRowTableInputFormat.SAMPLE_INDEX_TABLE, sampleIndexTable); + } } public static void initVariantRowMapperJobFromPhoenix(Job job, VariantHadoopDBAdaptor dbAdaptor, From 1d86756f547bd4dc78ad7007006d4915ca16f696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 09:39:45 +0000 Subject: [PATCH 074/122] storage: Fix CustomPhoenixInputFormat generateSplit for first and last splits. #TASK-6722 --- .../variant/mr/CustomPhoenixInputFormat.java | 45 ++++++++++++++----- .../stats/SampleVariantStatsDriver.java | 9 ++-- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java index b8e34933c9..30a1b0c6bc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java @@ -8,6 +8,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.db.DBWritable; @@ -23,6 +24,7 @@ import org.apache.phoenix.util.PhoenixRuntime; import org.opencb.opencga.storage.hadoop.HBaseCompat; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions; +import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,7 +72,22 @@ public void initialize(InputSplit split, TaskAttemptContext context) throws IOEx super.initialize(split, context); if (split instanceof PhoenixInputSplit) { PhoenixInputSplit phoenixInputSplit = (PhoenixInputSplit) split; - logger.info("Key range : " + phoenixInputSplit.getKeyRange()); + KeyRange keyRange = phoenixInputSplit.getKeyRange(); + logger.info("Key range : " + keyRange); + + try { + Pair chrPosStart = VariantPhoenixKeyFactory.extractChrPosFromVariantRowKey(keyRange.getLowerRange()); + Pair chrPosEnd = VariantPhoenixKeyFactory.extractChrPosFromVariantRowKey(keyRange.getUpperRange()); + logger.info("Variants key range : " + + (keyRange.isLowerInclusive() ? "[" : "(") + + chrPosStart.getFirst() + ":" + chrPosStart.getSecond() + + " - " + + chrPosEnd.getFirst() + ":" + chrPosEnd.getSecond() + + (keyRange.isUpperInclusive() ? "]" : ")")); + } catch (Exception e) { + logger.error("Error parsing key range: {}", e.getMessage()); + } + logger.info("Split: " + phoenixInputSplit.getScans().size() + " scans"); int i = 0; for (Scan scan : phoenixInputSplit.getScans()) { @@ -116,17 +133,23 @@ private List generateSplits(final QueryPlan qplan, final List splitScans = new ArrayList<>(numScans); Scan scan = scans.get(0); byte[] startRow = scan.getStartRow(); + if (startRow == null || startRow.length == 0) { + startRow = Bytes.toBytesBinary("1\\x00\\x00\\x00\\x00\\x00"); + logger.info("Scan with empty startRow. Set default start. " + + "[" + Bytes.toStringBinary(startRow) + "-" + Bytes.toStringBinary(scan.getStopRow()) + ")"); + } byte[] stopRow = scan.getStopRow(); - if (startRow != null && startRow.length != 0 && stopRow != null && stopRow.length != 0) { - byte[][] ranges = Bytes.split(startRow, stopRow, numScans - 1); - for (int i = 1; i < ranges.length; i++) { - Scan splitScan = new Scan(scan); - splitScan.withStartRow(ranges[i - 1]); - splitScan.withStopRow(ranges[i], false); - splitScans.add(splitScan); - } - } else { - splitScans.add(scan); + if (stopRow == null || stopRow.length == 0) { + stopRow = Bytes.toBytesBinary("Z\\x00\\x00\\x00\\x00\\x00"); + logger.info("Scan with empty stopRow. Set default stop. " + + "[" + Bytes.toStringBinary(startRow) + "-" + Bytes.toStringBinary(stopRow) + ")"); + } + byte[][] ranges = Bytes.split(startRow, stopRow, numScans - 1); + for (int i = 1; i < ranges.length; i++) { + Scan splitScan = new Scan(scan); + splitScan.withStartRow(ranges[i - 1]); + splitScan.withStopRow(ranges[i], false); + splitScans.add(splitScan); } for (Scan splitScan : splitScans) { psplits.add(new PhoenixInputSplit(Collections.singletonList(splitScan))); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java index 7ff4049afc..1c55569947 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java @@ -67,6 +67,7 @@ public class SampleVariantStatsDriver extends VariantTableAggregationDriver { private String trios; private String fileData; private String sampleData; + private Set includeSample; @Override protected Map getParams() { @@ -91,7 +92,7 @@ protected void parseAndValidateParameters() throws IOException { List samples = Arrays.asList(samplesStr.split(",")); StringBuilder trios = new StringBuilder(); - Set includeSample = new LinkedHashSet<>(); + includeSample = new LinkedHashSet<>(); if (samples.size() == 1 && (samples.get(0).equals("auto") || samples.get(0).equals("all"))) { boolean all = samples.get(0).equals("all"); metadataManager.sampleMetadataIterator(studyId).forEachRemaining(sampleMetadata -> { @@ -101,16 +102,18 @@ protected void parseAndValidateParameters() throws IOException { } } }); + sampleIds = new ArrayList<>(includeSample); } else { + sampleIds = new ArrayList<>(samples.size()); for (String sample : samples) { Integer sampleId = metadataManager.getSampleId(studyId, sample); if (sampleId == null) { throw VariantQueryException.sampleNotFound(sample, metadataManager.getStudyName(studyId)); } + sampleIds.add(sampleId); addTrio(trios, includeSample, metadataManager.getSampleMetadata(studyId, sampleId)); } } - sampleIds = new ArrayList<>(includeSample); if (sampleIds.isEmpty()) { throw new IllegalArgumentException("Nothing to do!"); } @@ -172,7 +175,7 @@ private static Pedigree readPedigree(Configuration conf) { protected Query getQuery() { Query query = super.getQuery() .append(VariantQueryParam.STUDY.key(), getStudyId()) - .append(VariantQueryParam.INCLUDE_SAMPLE.key(), sampleIds); + .append(VariantQueryParam.INCLUDE_SAMPLE.key(), includeSample); query.remove(VariantQueryParam.SAMPLE_DATA.key()); query.remove(VariantQueryParam.FILE_DATA.key()); return query; From 5141031b098e61a5aa7d9b67d43faa941517c995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 09:40:13 +0000 Subject: [PATCH 075/122] analysis: Fix NPE at relatedness tool. #TASK-6722 --- .../opencga/analysis/family/qc/IBDComputation.java | 9 +++++---- .../wrappers/plink/PlinkWrapperAnalysisExecutor.java | 11 +++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/IBDComputation.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/IBDComputation.java index 7277c3d242..7488510def 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/IBDComputation.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/IBDComputation.java @@ -32,6 +32,7 @@ import org.opencb.opencga.analysis.variant.relatedness.RelatednessAnalysis; import org.opencb.opencga.analysis.wrappers.plink.PlinkWrapperAnalysisExecutor; import org.opencb.opencga.catalog.exceptions.CatalogException; +import org.opencb.opencga.core.config.Analysis; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.models.family.Family; import org.opencb.opencga.core.models.individual.Individual; @@ -117,7 +118,7 @@ public static RelatednessReport compute(String study, Family family, List> inputBindings = new ArrayList<>(); inputBindings.add(new AbstractMap.SimpleEntry<>(freqPath.getParent().toString(), "/input")); @@ -311,8 +312,8 @@ private static File runIBD(String basename, Path freqPath, Path outDir) throws T String plinkParams = "plink1.9 --tfile /output/" + basename + " --genome rel-check --read-freq /input/" + FREQ_FILENAME + " --out /output/" + basename; try { - PlinkWrapperAnalysisExecutor plinkExecutor = new PlinkWrapperAnalysisExecutor(); - DockerUtils.run(plinkExecutor.getDockerImageName(), inputBindings, outputBinding, plinkParams, null); + String dockerImageName = PlinkWrapperAnalysisExecutor.getDockerImageName(analysisConf); + DockerUtils.run(dockerImageName, inputBindings, outputBinding, plinkParams, null); } catch (IOException e) { throw new ToolException(e); } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/plink/PlinkWrapperAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/plink/PlinkWrapperAnalysisExecutor.java index 890475b40f..ffe17a02a7 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/plink/PlinkWrapperAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/plink/PlinkWrapperAnalysisExecutor.java @@ -3,6 +3,8 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opencb.opencga.analysis.wrappers.executors.DockerWrapperAnalysisExecutor; +import org.opencb.opencga.core.config.Analysis; +import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +23,15 @@ public class PlinkWrapperAnalysisExecutor extends DockerWrapperAnalysisExecutor private Logger logger = LoggerFactory.getLogger(this.getClass()); + public static String getDockerImageName(Analysis analysisConf) throws ToolException { + return analysisConf.getOpencgaExtTools().split(":")[0]; + } + + @Override + public String getDockerImageName() throws ToolException { + return getDockerImageName(getConfiguration().getAnalysis()); + } + @Override protected void run() throws Exception { StringBuilder sb = initCommandLine(); From f2bc782de721f08a0c45c40b2c72c5fd9972e08f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 10:14:40 +0000 Subject: [PATCH 076/122] cicd: Upload tests logs as artifacts. Reduce action log size. #TASK-6722 --- .github/workflows/test-analysis.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index 554e1f7252..76437255b5 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -87,10 +87,24 @@ jobs: with: mongodb-version: 6.0 mongodb-replica-set: rs-test - - name: Maven build + - name: Maven build (skip tests) run: mvn -B clean install -DskipTests -P ${{ inputs.hadoop }} -Dcheckstyle.skip ${{ inputs.mvn_opts }} + - name: Build Junit log file name + id: BuildJunitLogFileName + run: | + MODULE=$(basename ${{ inputs.module }}) + if [[ -z "$MODULE" ]]; then + MODULE="opencga" + fi + TAGS=$(echo ${{ inputs.test_profile }} | sed -e 's/run\([^,]*\)Tests/\1/g' | tr ',' '_' | tr '[:upper:]' '[:lower:]' ) + echo "TESTS_LOG_FILE_NAME=junit_${{ inputs.hadoop }}_${TAGS}_${MODULE}.log" >> $GITHUB_OUTPUT - name: Run Junit tests - run: mvn -B verify surefire-report:report --fail-never -Dsurefire.testFailureIgnore=true -f ${{ (inputs.module == '' || inputs.module == 'all') && '.' || inputs.module }} -P ${{ inputs.hadoop }},${{ inputs.test_profile }} -Dcheckstyle.skip ${{ inputs.mvn_opts }} + run: mvn -B verify surefire-report:report --fail-never -Dsurefire.testFailureIgnore=true -f ${{ (inputs.module == '' || inputs.module == 'all') && '.' || inputs.module }} -P ${{ inputs.hadoop }},${{ inputs.test_profile }} -Dcheckstyle.skip ${{ inputs.mvn_opts }} |& tee ${{ steps.BuildJunitLogFileName.outputs.TESTS_LOG_FILE_NAME }} |& grep -P '^\[[^\]]*(INFO|WARNING|ERROR)' --colour=never --line-buffered + - name: Upload Junit test logs + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.BuildJunitLogFileName.outputs.TESTS_LOG_FILE_NAME }} + path: ${{ steps.BuildJunitLogFileName.outputs.TESTS_LOG_FILE_NAME }} - name: Publish Test Report on GitHub uses: scacap/action-surefire-report@v1 env: From dd684aafdb433acb000367be8451f1b12bea9054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 10:19:45 +0000 Subject: [PATCH 077/122] storage: Fix NPE at CohortVariantStatsDriver. #TASK-6722 --- .../hadoop/variant/stats/CohortVariantStatsDriver.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java index b633508455..b18e7cfa5e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/CohortVariantStatsDriver.java @@ -316,8 +316,14 @@ protected void map(Object key, VariantRow row, Context context) throws IOExcepti if (fileIds.contains(fileColumn.getFileId())) { if (fileColumn.getOverlappingStatus().equals(VariantOverlappingStatus.NONE)) { HashMap attributes = new HashMap<>(2); - attributes.put(StudyEntry.QUAL, fileColumn.getQualString()); - attributes.put(StudyEntry.FILTER, fileColumn.getFilter()); + String qualString = fileColumn.getQualString(); + if (qualString != null) { + attributes.put(StudyEntry.QUAL, qualString); + } + String filter = fileColumn.getFilter(); + if (filter != null) { + attributes.put(StudyEntry.FILTER, filter); + } entries.add(new FileEntry(String.valueOf(fileColumn.getFileId()), fileColumn.getCall(), attributes)); } } From 9795c6a67406614012049b44c5de0712124d6d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 10:47:44 +0000 Subject: [PATCH 078/122] cicd: Fix NPE. #TASK-6722 --- .github/workflows/test-analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index 76437255b5..1962c89ddc 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -92,7 +92,7 @@ jobs: - name: Build Junit log file name id: BuildJunitLogFileName run: | - MODULE=$(basename ${{ inputs.module }}) + MODULE=$(basename ${{ (inputs.module == '' || inputs.module == 'all') && 'opencga' || inputs.module }} ) if [[ -z "$MODULE" ]]; then MODULE="opencga" fi From 923651cb3e6b91266e51c036276f81c469734649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 10:52:57 +0000 Subject: [PATCH 079/122] storage: Fix AIOOBE SampleVariantStatsDriver #TASK-6722 --- .../stats/SampleVariantStatsDriver.java | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java index 1c55569947..bdf04ff741 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java @@ -63,11 +63,15 @@ public class SampleVariantStatsDriver extends VariantTableAggregationDriver { private static final String STATS_OPERATION_NAME = "sample_stats"; private static final String FIXED_FORMAT = "FIXED_FORMAT"; private static final String FIXED_FILE_ATTRIBUTES = "FIXED_FILE_ATTRIBUTES"; + // List of sampleIds to calculate stats private List sampleIds; + // List of sampleIds to include in the query needed to calculate stats. Might include parents + private Set includeSample; private String trios; private String fileData; private String sampleData; - private Set includeSample; + public static final String SAMPLE_IDS = "SampleVariantStatsDriver.sample_ids"; + public static final String INCLUDE_SAMPLE_IDS = "SampleVariantStatsDriver.include_sample_ids"; @Override protected Map getParams() { @@ -244,7 +248,8 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws List fixedFormat = HBaseToVariantConverter.getFixedFormat(studyMetadata); List fileAttributes = HBaseToVariantConverter.getFixedAttributes(studyMetadata); - job.getConfiguration().set(SAMPLES, sampleIds.stream().map(Objects::toString).collect(Collectors.joining(","))); + job.getConfiguration().set(SAMPLE_IDS, sampleIds.stream().map(Objects::toString).collect(Collectors.joining(","))); + job.getConfiguration().set(INCLUDE_SAMPLE_IDS, includeSample.stream().map(Objects::toString).collect(Collectors.joining(","))); job.getConfiguration().setStrings(FIXED_FORMAT, fixedFormat.toArray(new String[0])); job.getConfiguration().setStrings(FIXED_FILE_ATTRIBUTES, fileAttributes.toArray(new String[0])); if (StringUtils.isNotEmpty(fileData)) { @@ -364,10 +369,12 @@ public List getWritables() { } } + public static class SampleVariantStatsMapper extends VariantRowMapper { private int studyId; private int[] samples; + private int[] includeSamples; protected final Logger logger = LoggerFactory.getLogger(SampleVariantStatsMapper.class); private VariantStorageMetadataManager vsm; @@ -384,8 +391,9 @@ public static class SampleVariantStatsMapper extends VariantRowMapper Date: Fri, 29 Nov 2024 11:00:57 +0000 Subject: [PATCH 080/122] storage: Do not produce a .crc checksum file copying from hdfs. #TASK-6722 --- .../opencga/storage/hadoop/utils/MapReduceOutputFile.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java index e80a2d07da..b1a7e19397 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java @@ -360,7 +360,9 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool LOGGER.info(" Target {}: {}", getCompression(localOutput.getName()), localOutput.toUri()); LOGGER.info(" ---- "); - try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localOutput.getFileSystem(getConf()).create(localOutput))) { + FileSystem localFfileSystem = localOutput.getFileSystem(getConf()); + localFfileSystem.setWriteChecksum(false); + try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localFfileSystem.create(localOutput))) { for (int i = 0; i < paths.size(); i++) { Path partFile = paths.get(i); long partFileSize = fileSystem.getFileStatus(partFile).getLen(); From 9f326d90c012140016e5e3f712b387cacd06732e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 11:34:19 +0000 Subject: [PATCH 081/122] storage: Improve docker process failure. Do not close the stdin twice. #TASK-6722 --- .../variant/mr/StreamVariantMapper.java | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 95d0e0fb8c..081f512a6d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -308,13 +308,32 @@ private void closeProcess(Context context, boolean closeOutputs) throws IOExcept variantDataWriter.post(); variantDataWriter.close(); } + } catch (Throwable th) { + addException(th); + } finally { + variantDataWriter = null; + } - // Close stdin to the process. This will cause the process to finish. - if (stdin != null) { + try { + // Close the stream to the process + // This will cause the process to finish + // (if the process is reading from stdin, it will receive EOF) + // If the process has already finished, the stdin.close() will throw an exception + if (stdin != null && process.isAlive()) { stdin.close(); - stdin = null; } + } catch (Throwable th) { + if (th instanceof IOException && "Stream closed".equals(th.getMessage())) { + // Ignore "Stream closed" exception + } else { + addException(th); + } + } finally { + // Clear stdin even if it fails to avoid closing it twice + stdin = null; + } + try { if (process != null) { // Wait for the process to finish int exitVal = process.waitFor(); @@ -333,19 +352,24 @@ private void closeProcess(Context context, boolean closeOutputs) throws IOExcept if (stdout != null) { stdoutThread.join(); stdout.close(); - stdout = null; } } catch (Throwable th) { addException(th); + } finally { + // Clear stdout even if it fails to avoid closing it twice + stdout = null; } + try { if (stderr != null) { stderrThread.join(); stderr.close(); - stderr = null; } } catch (Throwable th) { addException(th); + } finally { + // Clear stderr even if it fails to avoid closing it twice + stderr = null; } try { From 627e56a9e1c8f4f4d9eeffca670cdbc4ff2c0886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 11:54:59 +0000 Subject: [PATCH 082/122] storage: Fix AIOOBE SampleVariantStatsDriver #TASK-6722 --- .../variant/mr/CustomPhoenixInputFormat.java | 19 ++++++--- .../stats/SampleVariantStatsDriver.java | 39 ++++++++++++------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java index 30a1b0c6bc..b1b37eba8f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/CustomPhoenixInputFormat.java @@ -125,26 +125,27 @@ private List generateSplits(final QueryPlan qplan, final List psplits = Lists.newArrayListWithExpectedSize(splits.size()); + int undividedSplits = 0; + int numScanSplit = configuration.getInt(HadoopVariantStorageOptions.MR_HBASE_PHOENIX_SCAN_SPLIT.key(), + HadoopVariantStorageOptions.MR_HBASE_PHOENIX_SCAN_SPLIT.defaultValue()); for (List scans : qplan.getScans()) { if (scans.size() == 1) { // Split scans into multiple smaller scans - int numScans = configuration.getInt(HadoopVariantStorageOptions.MR_HBASE_PHOENIX_SCAN_SPLIT.key(), - HadoopVariantStorageOptions.MR_HBASE_PHOENIX_SCAN_SPLIT.defaultValue()); - List splitScans = new ArrayList<>(numScans); + List splitScans = new ArrayList<>(numScanSplit); Scan scan = scans.get(0); byte[] startRow = scan.getStartRow(); if (startRow == null || startRow.length == 0) { startRow = Bytes.toBytesBinary("1\\x00\\x00\\x00\\x00\\x00"); logger.info("Scan with empty startRow. Set default start. " - + "[" + Bytes.toStringBinary(startRow) + "-" + Bytes.toStringBinary(scan.getStopRow()) + ")"); + + "[" + Bytes.toStringBinary(startRow) + " - " + Bytes.toStringBinary(scan.getStopRow()) + ")"); } byte[] stopRow = scan.getStopRow(); if (stopRow == null || stopRow.length == 0) { stopRow = Bytes.toBytesBinary("Z\\x00\\x00\\x00\\x00\\x00"); logger.info("Scan with empty stopRow. Set default stop. " - + "[" + Bytes.toStringBinary(startRow) + "-" + Bytes.toStringBinary(stopRow) + ")"); + + "[" + Bytes.toStringBinary(startRow) + " - " + Bytes.toStringBinary(stopRow) + ")"); } - byte[][] ranges = Bytes.split(startRow, stopRow, numScans - 1); + byte[][] ranges = Bytes.split(startRow, stopRow, numScanSplit - 1); for (int i = 1; i < ranges.length; i++) { Scan splitScan = new Scan(scan); splitScan.withStartRow(ranges[i - 1]); @@ -156,8 +157,14 @@ private List generateSplits(final QueryPlan qplan, final List 0) { + logger.info("There are " + undividedSplits + " splits that were not subdivided."); + } return psplits; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java index bdf04ff741..4c68b6f9b6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java @@ -381,7 +381,7 @@ public static class SampleVariantStatsMapper extends VariantRowMapper> fileToSampleIds = new HashMap<>(); private DistributedSampleVariantStatsCalculator calculator; private final HBaseToVariantAnnotationConverter annotationConverter = new HBaseToVariantAnnotationConverter(); - private int[] sampleIdsPosition; + private Map sampleIdsPosition; private int sampleDataDpIdx; private int fileDataDpIdx; private Predicate fileDataFilter; @@ -393,7 +393,6 @@ protected void setup(Context context) throws IOException, InterruptedException { studyId = context.getConfiguration().getInt(STUDY_ID, -1); samples = context.getConfiguration().getInts(SAMPLE_IDS); includeSamples = context.getConfiguration().getInts(INCLUDE_SAMPLE_IDS); - sampleIdsPosition = new int[IntStream.of(includeSamples).max().orElse(0) + 1]; String fileDataQuery = context.getConfiguration().get(VariantQueryParam.FILE_DATA.key()); String sampleDataQuery = context.getConfiguration().get(VariantQueryParam.SAMPLE_DATA.key()); @@ -404,9 +403,9 @@ protected void setup(Context context) throws IOException, InterruptedException { sampleDataDpIdx = fixedFormat.indexOf(VCFConstants.DEPTH_KEY); fileDataDpIdx = fileAttributes.indexOf(VCFConstants.DEPTH_KEY); - Arrays.fill(sampleIdsPosition, -1); + sampleIdsPosition = new HashMap<>(includeSamples.length); for (int i = 0; i < includeSamples.length; i++) { - sampleIdsPosition[includeSamples[i]] = i; + sampleIdsPosition.put(includeSamples[i], i); } Pedigree pedigree = readPedigree(context.getConfiguration()); @@ -424,11 +423,20 @@ private List getSamplesFromFileId(int fileId) { id -> { ArrayList sampleIds = new ArrayList<>(vsm.getFileMetadata(studyId, id).getSamples()); // Discard unused samples - sampleIds.removeIf(s -> sampleIdsPosition.length <= s || sampleIdsPosition[s] < 0); + sampleIds.removeIf(s -> !sampleIdsPosition.containsKey(s)); return sampleIds; }); } + private int getSamplePosition(Integer sampleId) { + Integer samplePosition = sampleIdsPosition.get(sampleId); + if (samplePosition == null) { + throw new IllegalStateException("Sample " + sampleId + " not found in includeSamples " + + Arrays.toString(includeSamples)); + } + return samplePosition; + } + @Override protected void map(Object key, VariantRow row, Context context) throws IOException, InterruptedException { VariantAnnotation[] annotation = new VariantAnnotation[1]; @@ -443,9 +451,10 @@ protected void map(Object key, VariantRow row, Context context) throws IOExcepti Variant variant = row.walker().onSample(sampleCell -> { int sampleId = sampleCell.getSampleId(); + int samplePosition = getSamplePosition(sampleId); if (!sampleDataFilter.test(sampleCell)) { // Invalidate sample - invalidSamples[sampleIdsPosition[sampleId]] = true; + invalidSamples[samplePosition] = true; return; } @@ -454,19 +463,19 @@ protected void map(Object key, VariantRow row, Context context) throws IOExcepti if (gt == null || gt.isEmpty()) { // This is a really weird situation, most likely due to errors in the input files logger.error("Empty genotype at sample " + sampleId + " in variant " + row.getVariant()); - gts.set(sampleIdsPosition[sampleId], GenotypeClass.NA_GT_VALUE); + gts.set(samplePosition, GenotypeClass.NA_GT_VALUE); } else if (gt.equals(GenotypeClass.UNKNOWN_GENOTYPE)) { // skip unknown genotypes context.getCounter(COUNTER_GROUP_NAME, "unknownGt").increment(1); } else { - gts.set(sampleIdsPosition[sampleId], gt); + gts.set(samplePosition, gt); } if (sampleDataDpIdx > 0) { String dp = sampleCell.getSampleData(sampleDataDpIdx); // Do not set invalid values if (StringUtils.isNumeric(dp)) { - dps.set(sampleIdsPosition[sampleId], dp); + dps.set(samplePosition, dp); } } }).onFile(fileCell -> { @@ -474,15 +483,16 @@ protected void map(Object key, VariantRow row, Context context) throws IOExcepti if (fileDataFilter.test(fileCell)) { for (Integer sampleId : getSamplesFromFileId(fileId)) { - filters.set(sampleIdsPosition[sampleId], fileCell.getFilter()); - quals.set(sampleIdsPosition[sampleId], fileCell.getQualString()); + int samplePosition = getSamplePosition(sampleId); + filters.set(samplePosition, fileCell.getFilter()); + quals.set(samplePosition, fileCell.getQualString()); if (fileDataDpIdx > 0) { String dp = fileCell.getFileData(fileDataDpIdx); // Do not set invalid values if (StringUtils.isNumeric(dp)) { // Prioritize DP value from FORMAT. Do not overwrite if present. - if (StringUtils.isEmpty(dps.get(sampleIdsPosition[sampleId]))) { - dps.set(sampleIdsPosition[sampleId], dp); + if (StringUtils.isEmpty(dps.get(samplePosition))) { + dps.set(samplePosition, dp); } } } @@ -490,7 +500,8 @@ protected void map(Object key, VariantRow row, Context context) throws IOExcepti } else { // Invalidate samples from this file for (Integer sampleId : getSamplesFromFileId(fileId)) { - invalidSamples[sampleIdsPosition[sampleId]] = true; + int samplePosition = getSamplePosition(sampleId); + invalidSamples[samplePosition] = true; } } }).onVariantAnnotation(variantAnnotationColumn -> { From 98ce6f8a333744ee2da22bdb7b3297a0ab91e704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 12:07:00 +0000 Subject: [PATCH 083/122] storage: Do not produce a .crc checksum file copying from hdfs. #TASK-6722 --- .../opencga/storage/hadoop/utils/MapReduceOutputFile.java | 7 ++++--- .../storage/hadoop/variant/mr/StreamVariantMapper.java | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java index b1a7e19397..64b7223807 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/MapReduceOutputFile.java @@ -344,6 +344,9 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool } } } + FileSystem localFileSystem = localOutput.getFileSystem(getConf()); + localFileSystem.setWriteChecksum(false); + StopWatch stopWatch = new StopWatch(); stopWatch.start(); if (paths.isEmpty()) { @@ -360,9 +363,7 @@ protected List concatMrOutputToLocal(Path mrOutdir, Path localOutput, bool LOGGER.info(" Target {}: {}", getCompression(localOutput.getName()), localOutput.toUri()); LOGGER.info(" ---- "); - FileSystem localFfileSystem = localOutput.getFileSystem(getConf()); - localFfileSystem.setWriteChecksum(false); - try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localFfileSystem.create(localOutput))) { + try (OutputStream os = getOutputStreamPlain(localOutput.getName(), localFileSystem.create(localOutput))) { for (int i = 0; i < paths.size(); i++) { Path partFile = paths.get(i); long partFileSize = fileSystem.getFileStatus(partFile).getLen(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 081f512a6d..89758279e0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -248,8 +248,8 @@ private void throwExceptionIfAny() throws IOException { if (hasExceptions()) { String message = "StreamVariantMapper failed:"; if (stderrThread != null) { - String stderr = String.join("\n", stderrThread.stderrBuffer); - message += "\nSTDERR: " + stderr; + String stderr = String.join("\n[STDERR] - ", stderrThread.stderrBuffer); + message += "\n[STDERR] - " + stderr; } if (throwables.size() == 1) { Throwable cause = throwables.get(0); From 14c07d90c63da3a324a06673fbdecec27ee6fa20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 12:27:55 +0000 Subject: [PATCH 084/122] analysis: Do not use the scratchDir as intermediate folder for export and walk. #TASK-6722 --- .../analysis/variant/VariantExportTool.java | 23 +++-------------- .../analysis/variant/VariantWalkerTool.java | 25 +++---------------- .../VariantInternalCommandExecutor.java | 2 +- .../models/variant/VariantExportParams.java | 15 ++++++----- 4 files changed, 16 insertions(+), 49 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java index b0a2005ac1..b1c49aeafa 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantExportTool.java @@ -20,8 +20,6 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.tools.OpenCgaTool; -import org.opencb.opencga.catalog.io.IOManager; -import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.variant.VariantExportParams; import org.opencb.opencga.core.tools.annotations.Tool; @@ -29,9 +27,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; -import java.net.URI; import java.nio.file.Path; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -70,12 +66,8 @@ protected List getSteps() { @Override protected void run() throws Exception { - List uris = new ArrayList<>(2); step(ID, () -> { - // Use scratch directory to store intermediate files. Move files to final directory at the end - // The scratch directory is expected to be faster than the final directory - // This also avoids moving files to final directory if the tool fails - Path outDir = getScratchDir(); + Path outDir = getOutDir(); String outputFile = StringUtils.isEmpty(toolParams.getOutputFileName()) ? outDir.toString() : outDir.resolve(toolParams.getOutputFileName()).toString(); @@ -84,18 +76,9 @@ protected void run() throws Exception { for (VariantQueryParam param : VariantQueryParam.values()) { queryOptions.remove(param.key()); } - uris.addAll(variantStorageManager.exportData(outputFile, + variantStorageManager.exportData(outputFile, outputFormat, - toolParams.getVariantsFile(), query, queryOptions, token)); - }); - step("move-files", () -> { - // Move files to final directory - IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0)); - for (URI uri : uris) { - String fileName = UriUtils.fileName(uri); - logger.info("Moving file -- " + fileName); - ioManager.move(uri, getOutDir().resolve(fileName).toUri()); - } + toolParams.getVariantsFile(), query, queryOptions, token); }); } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java index 5ab80d4f57..109a9d4b5a 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/VariantWalkerTool.java @@ -20,17 +20,13 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.tools.OpenCgaTool; -import org.opencb.opencga.catalog.io.IOManager; -import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.variant.VariantWalkerParams; import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.core.tools.annotations.ToolParams; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; -import java.net.URI; import java.nio.file.Path; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -75,29 +71,14 @@ protected List getSteps() { @Override protected void run() throws Exception { - List uris = new ArrayList<>(2); step(ID, () -> { - // Use scratch directory to store intermediate files. Move files to final directory at the end - // The scratch directory is expected to be faster than the final directory - // This also avoids moving files to final directory if the tool fails - Path outDir = getScratchDir(); + Path outDir = getOutDir(); String outputFile = outDir.resolve(toolParams.getOutputFileName()).toString(); Query query = toolParams.toQuery(); QueryOptions queryOptions = new QueryOptions().append(QueryOptions.INCLUDE, toolParams.getInclude()) .append(QueryOptions.EXCLUDE, toolParams.getExclude()); - uris.addAll(variantStorageManager.walkData(outputFile, - format, query, queryOptions, toolParams.getDockerImage(), toolParams.getCommandLine(), token)); - }); - step("move-files", () -> { - // Move files to final directory - if (!uris.isEmpty()) { - IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0)); - for (URI uri : uris) { - String fileName = UriUtils.fileName(uri); - logger.info("Moving file -- " + fileName); - ioManager.move(uri, getOutDir().resolve(fileName).toUri()); - } - } + variantStorageManager.walkData(outputFile, + format, query, queryOptions, toolParams.getDockerImage(), toolParams.getCommandLine(), token); }); } } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java index 7d20deccb0..f59e9178e9 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java @@ -341,7 +341,7 @@ private void query(VariantCommandOptions.AbstractVariantQueryCommandOptions cliO queryOptions.putIfNotEmpty("annotations", cliOptions.genericVariantQueryOptions.annotations); VariantExportParams toolParams = new VariantExportParams( - query, outdir, + query, cliOptions.outputFileName, cliOptions.outputFileFormat, cliOptions.variantsFile); diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantExportParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantExportParams.java index 7e5c1870e8..3f00adc7be 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantExportParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantExportParams.java @@ -16,12 +16,13 @@ package org.opencb.opencga.core.models.variant; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import org.opencb.commons.datastore.core.Query; public class VariantExportParams extends VariantQueryParams { public static final String DESCRIPTION = "Variant export params"; - private String outdir; + private String outputFileName; private String outputFileFormat; private String variantsFile; @@ -35,21 +36,23 @@ public class VariantExportParams extends VariantQueryParams { public VariantExportParams() { } - public VariantExportParams(Query query, String outdir, String outputFileName, String outputFileFormat, + public VariantExportParams(Query query, String outputFileName, String outputFileFormat, String variantsFile) { super(query); - this.outdir = outdir; this.outputFileName = outputFileName; this.outputFileFormat = outputFileFormat; this.variantsFile = variantsFile; } + @Deprecated + @JsonIgnore public String getOutdir() { - return outdir; + return null; } - public VariantExportParams setOutdir(String outdir) { - this.outdir = outdir; + @Deprecated + @JsonIgnore + public VariantExportParams setOutdir(String unused) { return this; } From 050c1ee08c74625bb6ef3158fb3dffa5b41e873c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 29 Nov 2024 12:42:21 +0000 Subject: [PATCH 085/122] storage: Improve collections usage in SampleVariantStatsDriver. #TASK-6722 --- .../stats/SampleVariantStatsDriver.java | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java index 4c68b6f9b6..2a94116882 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java @@ -48,7 +48,6 @@ import java.util.*; import java.util.function.Predicate; import java.util.stream.Collectors; -import java.util.stream.IntStream; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine.STUDY_ID; @@ -341,10 +340,10 @@ public void merge(SampleVariantStatsWritable other) { public static class DistributedSampleVariantStatsCalculator extends SampleVariantStatsCalculator { - private int[] sampleIds; + private List sampleIds; - public DistributedSampleVariantStatsCalculator(Pedigree pedigree, int[] samples) { - super(pedigree, IntStream.of(samples).mapToObj(String::valueOf).collect(Collectors.toList())); + public DistributedSampleVariantStatsCalculator(Pedigree pedigree, List samples) { + super(pedigree, samples.stream().map(String::valueOf).collect(Collectors.toList())); sampleIds = samples; } @@ -363,7 +362,7 @@ public List getWritables() { List writables = new ArrayList<>(statsList.size()); for (int i = 0; i < statsList.size(); i++) { writables.add(new SampleVariantStatsWritable( - sampleIds[i], ti[i], tv[i], qualCount[i], qualSum[i], qualSumSq[i], statsList.get(i))); + sampleIds.get(i), ti[i], tv[i], qualCount[i], qualSum[i], qualSumSq[i], statsList.get(i))); } return writables; } @@ -373,8 +372,8 @@ public List getWritables() { public static class SampleVariantStatsMapper extends VariantRowMapper { private int studyId; - private int[] samples; - private int[] includeSamples; + private LinkedHashSet samples; + private LinkedHashSet includeSamples; protected final Logger logger = LoggerFactory.getLogger(SampleVariantStatsMapper.class); private VariantStorageMetadataManager vsm; @@ -391,8 +390,12 @@ public static class SampleVariantStatsMapper extends VariantRowMapper(samplesArray.length); + Arrays.stream(samplesArray).forEach(samples::add); + int[] includeSamplesArray = context.getConfiguration().getInts(INCLUDE_SAMPLE_IDS); + includeSamples = new LinkedHashSet<>(includeSamplesArray.length); + Arrays.stream(includeSamplesArray).forEach(includeSamples::add); String fileDataQuery = context.getConfiguration().get(VariantQueryParam.FILE_DATA.key()); String sampleDataQuery = context.getConfiguration().get(VariantQueryParam.SAMPLE_DATA.key()); @@ -403,13 +406,13 @@ protected void setup(Context context) throws IOException, InterruptedException { sampleDataDpIdx = fixedFormat.indexOf(VCFConstants.DEPTH_KEY); fileDataDpIdx = fileAttributes.indexOf(VCFConstants.DEPTH_KEY); - sampleIdsPosition = new HashMap<>(includeSamples.length); - for (int i = 0; i < includeSamples.length; i++) { - sampleIdsPosition.put(includeSamples[i], i); + sampleIdsPosition = new HashMap<>(includeSamples.size()); + for (Integer sampleId : includeSamples) { + sampleIdsPosition.put(sampleId, sampleIdsPosition.size()); } Pedigree pedigree = readPedigree(context.getConfiguration()); - calculator = new DistributedSampleVariantStatsCalculator(pedigree, samples); + calculator = new DistributedSampleVariantStatsCalculator(pedigree, new ArrayList<>(samples)); calculator.pre(); fileDataFilter = filterFactory.buildFileDataFilter(fileDataQuery); @@ -431,8 +434,7 @@ private List getSamplesFromFileId(int fileId) { private int getSamplePosition(Integer sampleId) { Integer samplePosition = sampleIdsPosition.get(sampleId); if (samplePosition == null) { - throw new IllegalStateException("Sample " + sampleId + " not found in includeSamples " - + Arrays.toString(includeSamples)); + throw new IllegalStateException("Sample " + sampleId + " not found in includeSamples " + includeSamples); } return samplePosition; } @@ -441,19 +443,24 @@ private int getSamplePosition(Integer sampleId) { protected void map(Object key, VariantRow row, Context context) throws IOException, InterruptedException { VariantAnnotation[] annotation = new VariantAnnotation[1]; - List gts = Arrays.asList(new String[samples.length]); - List dps = Arrays.asList(new String[samples.length]); - List quals = Arrays.asList(new String[samples.length]); - List filters = Arrays.asList(new String[samples.length]); + List gts = Arrays.asList(new String[includeSamples.size()]); + List dps = Arrays.asList(new String[includeSamples.size()]); + List quals = Arrays.asList(new String[includeSamples.size()]); + List filters = Arrays.asList(new String[includeSamples.size()]); // All samples valid by default. // If any filter (either sample-data or file-data) fails, then the sample would become invalid. - boolean[] invalidSamples = new boolean[samples.length]; + boolean[] invalidSamples = new boolean[includeSamples.size()]; Variant variant = row.walker().onSample(sampleCell -> { int sampleId = sampleCell.getSampleId(); int samplePosition = getSamplePosition(sampleId); - if (!sampleDataFilter.test(sampleCell)) { + if (sampleCell.getStudyId() != studyId || !includeSamples.contains(sampleId)) { + context.getCounter(COUNTER_GROUP_NAME, "unexpected_sample_discarded").increment(1); + return; + } + if (samples.contains(sampleId) && !sampleDataFilter.test(sampleCell)) { // Invalidate sample + // Do not invalidate extra samples, as might be used for calculating other stats invalidSamples[samplePosition] = true; return; } @@ -519,7 +526,7 @@ protected void map(Object key, VariantRow row, Context context) throws IOExcepti } } context.getCounter(COUNTER_GROUP_NAME, "variants_total").increment(1); - if (invalidSamplesCount == samples.length) { + if (invalidSamplesCount == samples.size()) { context.getCounter(COUNTER_GROUP_NAME, "variants_discarded").increment(1); } else { context.getCounter(COUNTER_GROUP_NAME, "variants_used").increment(1); From a0c2a5f4a1d2fe13d93bfc0dc36f04da13841d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 2 Dec 2024 14:43:45 +0000 Subject: [PATCH 086/122] analysis: Fix VariantAnalysisTest. #TASK-6722 --- .../manager/VariantStorageManager.java | 4 +-- .../stats/SampleVariantStatsAnalysis.java | 3 +- .../analysis/variant/VariantAnalysisTest.java | 26 +++++++++++--- .../adaptors/phoenix/PhoenixHelper.java | 32 ++--------------- ...ntStatsHBaseMapReduceAnalysisExecutor.java | 11 ++++-- .../filters/VariantRowFilterFactory.java | 3 ++ .../stats/SampleVariantStatsDriver.java | 35 +++++++++++++------ 7 files changed, 62 insertions(+), 52 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index a5d02ab020..9c718fa82a 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -88,7 +88,6 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; -import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat; import org.opencb.opencga.storage.core.variant.query.ParsedQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; @@ -469,8 +468,9 @@ private CatalogStorageMetadataSynchronizer getSynchronizer(VariantStorageEngine return synchronizer; } - public DataResult familyIndexBySamples(String study, Collection samples, ObjectMap params, String token) + public DataResult familyIndexBySamples(String inputStudy, Collection samples, ObjectMap params, String token) throws CatalogException, StorageEngineException { + String study = getStudyFqn(inputStudy, token); return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> { Collection thisSamples = samples; boolean allSamples; diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java index a12072e3a1..540051b4a9 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java @@ -43,6 +43,7 @@ import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.core.tools.annotations.ToolParams; import org.opencb.opencga.core.tools.variant.SampleVariantStatsAnalysisExecutor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import java.io.OutputStream; @@ -254,7 +255,7 @@ protected void run() throws ToolException { .setOutputFile(tmpOutputFile) .setStudy(study) .setSampleNames(batchSamples) - .setVariantQuery(variantQuery) + .setVariantQuery(new VariantQuery(variantQuery).includeSample(batchSamples)) .execute(); if (tmpOutputFile != outputFile) { diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java index 8f3f9695a3..0a622c3982 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java @@ -80,10 +80,7 @@ import org.opencb.opencga.core.models.organizations.OrganizationUpdateParams; import org.opencb.opencga.core.models.project.ProjectCreateParams; import org.opencb.opencga.core.models.project.ProjectOrganism; -import org.opencb.opencga.core.models.sample.Sample; -import org.opencb.opencga.core.models.sample.SampleQualityControl; -import org.opencb.opencga.core.models.sample.SampleReferenceParam; -import org.opencb.opencga.core.models.sample.SampleUpdateParams; +import org.opencb.opencga.core.models.sample.*; import org.opencb.opencga.core.models.variant.*; import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.testclassification.duration.LongTests; @@ -184,7 +181,6 @@ public void setUp() throws Throwable { VariantOperationsTest.dummyVariantSetup(variantStorageManager, CANCER_STUDY, token); file = opencga.createFile(STUDY, "variant-test-file.vcf.gz", token); - variantStorageManager.index(STUDY, file.getId(), opencga.createTmpOutdir("_index"), new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true), token); for (int i = 0; i < file.getSampleIds().size(); i++) { String id = file.getSampleIds().get(i); @@ -233,6 +229,9 @@ public void setUp() throws Throwable { individuals.stream().map(Individual::getId).collect(Collectors.toList()), new QueryOptions(), token); + variantStorageManager.index(STUDY, file.getId(), opencga.createTmpOutdir("_index"), new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true), token); + variantStorageManager.familyIndexBySamples(STUDY, file.getSampleIds(), new ObjectMap(), token); + // Cancer (SV) ObjectMap config = new ObjectMap(); // config.put(VariantStorageOptions.ANNOTATE.key(), true); @@ -411,6 +410,7 @@ private java.io.File getOutputFile(Path outDir) { @Test public void testSampleStatsSampleFilter() throws Exception { + clearSampleVariantStats(); Assume.assumeThat(storageEngine, CoreMatchers.is(HadoopVariantStorageEngine.STORAGE_ENGINE_ID)); // Reset quality control stats for (Sample sample : catalogManager.getSampleManager().search(STUDY, new Query(), new QueryOptions(), token).getResults()) { @@ -431,11 +431,27 @@ public void testSampleStatsSampleFilter() throws Exception { @Test public void testSampleStatsWithGeneFilter() throws Exception { + clearSampleVariantStats(); sampleVariantStats(null, "stats_BRCA1", false, 1, file.getSampleIds().subList(0, 2), false, new VariantQuery().gene("BRCA1")); } + @Test + public void testSampleStatsFromOffspringFilter() throws Exception { + clearSampleVariantStats(); + sampleVariantStats(null, "stats_offspring", false, 1, Collections.singletonList(daughter)); + } + + private void clearSampleVariantStats() throws CatalogException { + for (String sampleId : file.getSampleIds()) { + SampleQualityControl qualityControl = catalogManager.getSampleManager().get(STUDY, sampleId, new QueryOptions(), token).first().getQualityControl(); + qualityControl.getVariant().getVariantStats().clear(); + catalogManager.getSampleManager().update(STUDY, sampleId, new SampleUpdateParams().setQualityControl(qualityControl), new QueryOptions(), token); + } + } + @Test public void testSampleStats() throws Exception { + clearSampleVariantStats(); sampleVariantStats("1,2", "stats_1", false, 1, file.getSampleIds().subList(0, 2)); sampleVariantStats("1,2", "stats_1", false, 1, file.getSampleIds().subList(2, 4)); sampleVariantStats("1,2", "stats_2", false, 2, Collections.singletonList(ParamConstants.ALL)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/PhoenixHelper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/PhoenixHelper.java index a7a87cad4e..b702ba1a89 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/PhoenixHelper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/PhoenixHelper.java @@ -36,10 +36,7 @@ import org.apache.phoenix.schema.PTable; import org.apache.phoenix.schema.PTableType; import org.apache.phoenix.schema.TableNotFoundException; -import org.apache.phoenix.schema.types.PArrayDataType; -import org.apache.phoenix.schema.types.PDataType; -import org.apache.phoenix.schema.types.PInteger; -import org.apache.phoenix.schema.types.PhoenixArray; +import org.apache.phoenix.schema.types.*; import org.apache.phoenix.util.*; import org.opencb.opencga.core.common.BatchUtils; import org.opencb.opencga.core.common.ExceptionUtils; @@ -51,8 +48,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; @@ -75,37 +70,14 @@ public class PhoenixHelper { private final Configuration conf; private static Logger logger = LoggerFactory.getLogger(PhoenixHelper.class); - private static Method positionAtArrayElement; private TableName systemCatalog; public PhoenixHelper(Configuration conf) { this.conf = conf; } - static { - Class decoder; - try { - decoder = Class.forName("org.apache.phoenix.schema.types.PArrayDataTypeDecoder"); - } catch (ClassNotFoundException e) { - decoder = PArrayDataType.class; - } - try { - positionAtArrayElement = decoder.getMethod("positionAtArrayElement", - ImmutableBytesWritable.class, Integer.TYPE, PDataType.class, Integer.class); - } catch (NoSuchMethodException e) { - // This should never happen! - throw new RuntimeException(e); - } - } - public static boolean positionAtArrayElement(ImmutableBytesWritable ptr, int arrayIndex, PDataType pDataType, Integer byteSize) { -// return PArrayDataTypeDecoder.positionAtArrayElement(ptr, arrayIndex, instance, byteSize); - try { - Object o = positionAtArrayElement.invoke(null, ptr, arrayIndex, pDataType, byteSize); - return o == null || (boolean) o; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new RuntimeException(e); - } + return PArrayDataTypeDecoder.positionAtArrayElement(ptr, arrayIndex, pDataType, byteSize); } public boolean execute(Connection con, String sql) throws SQLException { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java index 8b4b017535..bba1d23873 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/stats/SampleVariantStatsHBaseMapReduceAnalysisExecutor.java @@ -6,8 +6,9 @@ import org.opencb.opencga.core.exceptions.ToolExecutorException; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.opencb.opencga.core.tools.variant.SampleVariantStatsAnalysisExecutor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; -import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.analysis.HadoopVariantStorageToolExecutor; @@ -45,9 +46,13 @@ public void run() throws ToolException { } } - ParsedVariantQuery variantQuery = engine.parseQuery(getVariantQuery(), new QueryOptions()); + VariantQuery query = engine.parseQuery(getVariantQuery(), new QueryOptions()).getQuery(); + // SampleData and FileData filters should not include the sample or file names. + // The parser would add them. Restore the original query values (if any) + query.putIfNotNull(VariantQueryParam.SAMPLE_DATA.key(), getVariantQuery().get(VariantQueryParam.SAMPLE_DATA.key())); + query.putIfNotNull(VariantQueryParam.FILE_DATA.key(), getVariantQuery().get(VariantQueryParam.FILE_DATA.key())); ObjectMap params = new ObjectMap(engine.getOptions()) - .appendAll(variantQuery.getQuery()) + .appendAll(query) .append(SampleVariantStatsDriver.SAMPLES, sampleNames) .append(SampleVariantStatsDriver.OUTPUT, getOutputFile().toAbsolutePath().toUri()); engine.getMRExecutor().run(SampleVariantStatsDriver.class, SampleVariantStatsDriver.buildArgs( diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/filters/VariantRowFilterFactory.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/filters/VariantRowFilterFactory.java index d7d3a09a07..d300776465 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/filters/VariantRowFilterFactory.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/filters/VariantRowFilterFactory.java @@ -145,6 +145,9 @@ public Predicate buildSampleDataFilter(String sampleDat final Predicate predicate; int idx = fixedFormat.indexOf(filter.getKey()); + if (idx < 0) { + throw new IllegalArgumentException("Unknown key '" + filter.getKey() + "'. Supported keys are: " + fixedFormat); + } String filterValue = filter.getValue(); if (StringUtils.isNumeric(filterValue)) { // Numeric value diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java index 2a94116882..abc4346532 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsDriver.java @@ -95,16 +95,17 @@ protected void parseAndValidateParameters() throws IOException { List samples = Arrays.asList(samplesStr.split(",")); StringBuilder trios = new StringBuilder(); + int triosCount = 0; includeSample = new LinkedHashSet<>(); if (samples.size() == 1 && (samples.get(0).equals("auto") || samples.get(0).equals("all"))) { boolean all = samples.get(0).equals("all"); - metadataManager.sampleMetadataIterator(studyId).forEachRemaining(sampleMetadata -> { + for (SampleMetadata sampleMetadata : metadataManager.sampleMetadataIterable(studyId)) { if (sampleMetadata.isIndexed()) { if (all || sampleMetadata.getStats() == null || MapUtils.isEmpty(sampleMetadata.getStats().getBiotypeCount())) { - addTrio(trios, includeSample, sampleMetadata); + triosCount += addTrio(trios, includeSample, sampleMetadata); } } - }); + } sampleIds = new ArrayList<>(includeSample); } else { sampleIds = new ArrayList<>(samples.size()); @@ -114,12 +115,15 @@ protected void parseAndValidateParameters() throws IOException { throw VariantQueryException.sampleNotFound(sample, metadataManager.getStudyName(studyId)); } sampleIds.add(sampleId); - addTrio(trios, includeSample, metadataManager.getSampleMetadata(studyId, sampleId)); + triosCount += addTrio(trios, includeSample, metadataManager.getSampleMetadata(studyId, sampleId)); } } if (sampleIds.isEmpty()) { throw new IllegalArgumentException("Nothing to do!"); } + LOGGER.info(" * samples : " + (samples.size() > 10 ? (samples.subList(0, 10) + "...") : samples) + " (" + samples.size() + ")"); + LOGGER.info(" * includeSamples : " + includeSample.size()); + LOGGER.info(" * familyTrios : " + triosCount); fileData = getParam(VariantQueryParam.FILE_DATA.key()); if (StringUtils.isNotEmpty(fileData)) { LOGGER.info(" * fileData : " + fileData); @@ -133,7 +137,7 @@ protected void parseAndValidateParameters() throws IOException { } - private void addTrio(StringBuilder trios, Set includeSample, SampleMetadata sampleMetadata) { + private int addTrio(StringBuilder trios, Set includeSample, SampleMetadata sampleMetadata) { includeSample.add(sampleMetadata.getId()); if (sampleMetadata.getFather() != null || sampleMetadata.getMother() != null) { // Make sure parents are included in the query @@ -149,7 +153,9 @@ private void addTrio(StringBuilder trios, Set includeSample, SampleMeta .append(",") .append(sampleMetadata.getMother() == null ? "0" : sampleMetadata.getMother()) .append(";"); + return 1; } + return 0; } private static Pedigree readPedigree(Configuration conf) { @@ -340,11 +346,13 @@ public void merge(SampleVariantStatsWritable other) { public static class DistributedSampleVariantStatsCalculator extends SampleVariantStatsCalculator { - private List sampleIds; + private Set sampleIds; + private List includeSampleIds; - public DistributedSampleVariantStatsCalculator(Pedigree pedigree, List samples) { - super(pedigree, samples.stream().map(String::valueOf).collect(Collectors.toList())); + public DistributedSampleVariantStatsCalculator(Pedigree pedigree, Set samples, List includeSamples) { + super(pedigree, includeSamples.stream().map(String::valueOf).collect(Collectors.toList())); sampleIds = samples; + includeSampleIds = includeSamples; } public DistributedSampleVariantStatsCalculator(SampleVariantStatsWritable statsWritable) { @@ -361,8 +369,13 @@ public DistributedSampleVariantStatsCalculator(SampleVariantStatsWritable statsW public List getWritables() { List writables = new ArrayList<>(statsList.size()); for (int i = 0; i < statsList.size(); i++) { - writables.add(new SampleVariantStatsWritable( - sampleIds.get(i), ti[i], tv[i], qualCount[i], qualSum[i], qualSumSq[i], statsList.get(i))); + Integer sampleId = includeSampleIds.get(i); + if (sampleIds.contains(sampleId)) { + // Only write samples that were requested + // Skip samples that were included but not requested, as they are used for calculating other stats + writables.add(new SampleVariantStatsWritable( + sampleId, ti[i], tv[i], qualCount[i], qualSum[i], qualSumSq[i], statsList.get(i))); + } } return writables; } @@ -412,7 +425,7 @@ protected void setup(Context context) throws IOException, InterruptedException { } Pedigree pedigree = readPedigree(context.getConfiguration()); - calculator = new DistributedSampleVariantStatsCalculator(pedigree, new ArrayList<>(samples)); + calculator = new DistributedSampleVariantStatsCalculator(pedigree, samples, new ArrayList<>(includeSamples)); calculator.pre(); fileDataFilter = filterFactory.buildFileDataFilter(fileDataQuery); From 3853c638c96dfc5e5ca5628cd1af595cfaae558e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 3 Dec 2024 09:14:19 +0000 Subject: [PATCH 087/122] app: Regenerate cli. #TASK-6722 --- .../AnalysisVariantCommandExecutor.java | 1 - .../AnalysisVariantCommandOptions.java | 3 - .../core/metadata/models/ProjectMetadata.java | 61 +++++++++++++++++++ .../hadoop/app/VariantMetadataMain.java | 58 +++++++++++++++++- 4 files changed, 118 insertions(+), 5 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java index 1410934a49..95d1bcf7b4 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java @@ -559,7 +559,6 @@ private RestResponse runExport() throws Exception { putNestedIfNotEmpty(beanParams, "unknownGenotype", commandOptions.unknownGenotype, true); putNestedIfNotNull(beanParams, "sampleMetadata", commandOptions.sampleMetadata, true); putNestedIfNotNull(beanParams, "sort", commandOptions.sort, true); - putNestedIfNotEmpty(beanParams, "outdir", commandOptions.outdir, true); putNestedIfNotEmpty(beanParams, "outputFileName", commandOptions.outputFileName, true); putNestedIfNotEmpty(beanParams, "outputFileFormat", commandOptions.outputFileFormat, true); putNestedIfNotEmpty(beanParams, "variantsFile", commandOptions.variantsFile, true); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java index 24a37e0422..491495faa8 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java @@ -723,9 +723,6 @@ public class RunExportCommandOptions { @Parameter(names = {"--sort"}, description = "The body web service sort parameter", required = false, help = true, arity = 0) public boolean sort = false; - @Parameter(names = {"--outdir"}, description = "The body web service outdir parameter", required = false, arity = 1) - public String outdir; - @Parameter(names = {"--output-file-name"}, description = "The body web service outputFileName parameter", required = false, arity = 1) public String outputFileName; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/ProjectMetadata.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/ProjectMetadata.java index e64deedff6..fc8744528a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/ProjectMetadata.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/ProjectMetadata.java @@ -70,6 +70,24 @@ public VariantAnnotationSets setSaved(List saved) { this.saved = saved; return this; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + VariantAnnotationSets that = (VariantAnnotationSets) o; + return Objects.equals(current, that.current) + && Objects.equals(saved, that.saved); + } + + @Override + public int hashCode() { + return Objects.hash(current, saved); + } } public static class VariantAnnotationMetadata { @@ -158,6 +176,28 @@ public VariantAnnotationMetadata setPrivateSources(List privateSources) this.privateSources = privateSources; return this; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + VariantAnnotationMetadata that = (VariantAnnotationMetadata) o; + return id == that.id && Objects.equals(name, that.name) + && Objects.equals(creationDate, that.creationDate) + && Objects.equals(annotator, that.annotator) + && Objects.equals(sourceVersion, that.sourceVersion) + && Objects.equals(dataRelease, that.dataRelease) + && Objects.equals(privateSources, that.privateSources); + } + + @Override + public int hashCode() { + return Objects.hash(id, name, creationDate, annotator, sourceVersion, dataRelease, privateSources); + } } public static class VariantAnnotatorProgram { @@ -321,4 +361,25 @@ public ProjectMetadata setAttributes(ObjectMap attributes) { return this; } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ProjectMetadata that = (ProjectMetadata) o; + return release == that.release && Objects.equals(species, that.species) + && Objects.equals(assembly, that.assembly) + && Objects.equals(dataRelease, that.dataRelease) + && Objects.equals(annotation, that.annotation) + && Objects.equals(counters, that.counters) + && Objects.equals(attributes, that.attributes); + } + + @Override + public int hashCode() { + return Objects.hash(species, assembly, dataRelease, release, annotation, counters, attributes); + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java index 202ab39f0b..34fa789026 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/VariantMetadataMain.java @@ -56,7 +56,10 @@ public VariantMetadataCommandExecutor() { public VariantMetadataCommandExecutor(String argsContext) { super(argsContext); addSubCommand(Arrays.asList("tables", "table"), "[help|list]", new HBaseTablesCommandExecutor()); - addSubCommand(Arrays.asList("study-metadata", "sm", "study", "studies"), "[help|list|id|read|write|rename] ..", + addSubCommand(Arrays.asList("project-metadata", "p", "pm", "project"), "[help|read|write|replace] ..", + new ProjectCommandExecutor()); + addSubCommand(Arrays.asList("study-metadata", "sm", "study", "studies"), "[help|list|id|read|write|replace|rename] " + + " ..", new StudyCommandExecutor()); addSubCommand(Arrays.asList("file-metadata", "fm", "file", "files"), "[help|list|id|read|write] ...", new FileCommandExecutor()); @@ -115,6 +118,37 @@ protected void cleanup(String command, String[] args) throws Exception { } } + private static class ProjectCommandExecutor extends VariantStorageMetadataManagerCommandExecutor { + ProjectCommandExecutor() { + addSubCommand(Arrays.asList("read", "info"), + "", + args -> { + print(mm.getProjectMetadata()); + } + ); + addSubCommand(Arrays.asList("write", "update"), + " ", + args -> { + ProjectMetadata projectMetadata = readFile(getArg(args, 1), ProjectMetadata.class); + mm.updateProjectMetadata(pm -> projectMetadata); + } + ); + addSubCommand(Arrays.asList("replace"), + " ", + args -> { + ProjectMetadata origProjectMetadata = readFile(getArg(args, 1), ProjectMetadata.class); + ProjectMetadata newProjectMetadata = readFile(getArg(args, 2), ProjectMetadata.class); + mm.updateProjectMetadata(pm -> { + if (!pm.equals(origProjectMetadata)) { + throw new IllegalStateException("Original ProjectMetadata does not match!"); + } + return newProjectMetadata; + }); + } + ); + } + } + private static class StudyCommandExecutor extends VariantStorageMetadataManagerCommandExecutor { StudyCommandExecutor() { addSubCommand(Arrays.asList("list", "search"), @@ -129,12 +163,34 @@ private static class StudyCommandExecutor extends VariantStorageMetadataManagerC print(mm.getStudyMetadata(getArg(args, 1))); } ); + addSubCommand(Arrays.asList("id"), + " ", + args -> { + print(mm.getStudyId(getArg(args, 1))); + } + ); addSubCommand(Arrays.asList("write", "update"), " ", args -> { mm.unsecureUpdateStudyMetadata(readFile(getArg(args, 1), StudyMetadata.class)); } ); + addSubCommand(Arrays.asList("replace"), + " ", + args -> { + StudyMetadata origStudyMetadata = readFile(getArg(args, 1), StudyMetadata.class); + StudyMetadata newStudyMetadata = readFile(getArg(args, 2), StudyMetadata.class); + if (origStudyMetadata.getId() != newStudyMetadata.getId()) { + throw new IllegalStateException("StudyMetadata IDs do not match!"); + } + mm.updateStudyMetadata(origStudyMetadata.getId(), sm -> { + if (!sm.equals(origStudyMetadata)) { + throw new IllegalStateException("Original StudyMetadata does not match!"); + } + return newStudyMetadata; + }); + }); + addSubCommand(Arrays.asList("rename"), " ", args -> { From eb61609e5e2829123e70104f49428186da7c2b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 3 Dec 2024 22:21:40 +0000 Subject: [PATCH 088/122] storage: Fix junit tests. #TASK-6722 --- .../variant/mr/StreamVariantMapper.java | 11 ++++++--- .../analysis/gwas/FisherTestDriverTest.java | 24 +++++++------------ .../variant/stats/SampleVariantStatsTest.java | 14 ++++------- .../walker/HadoopVariantWalkerTest.java | 9 ++++++- .../test/resources/variantWalker/Dockerfile | 2 +- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java index 89758279e0..f4b389bd69 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantMapper.java @@ -44,6 +44,7 @@ public class StreamVariantMapper extends VariantMapper { public static final String COMMANDLINE_BASE64 = "opencga.variant.stream.commandline_base64"; public static final String ADDENVIRONMENT_PARAM = "opencga.variant.stream.addenvironment"; public static final String HAS_REDUCE = "opencga.variant.stream.hasReduce"; + public static final String DOCKER_PRUNE_OPTS = "opencga.variant.stream.docker.prune.opts"; private final boolean verboseStdout = false; private static final long REPORTER_OUT_DELAY = 10 * 1000L; @@ -269,16 +270,20 @@ private void throwExceptionIfAny() throws IOException { @Override protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { closeProcess(context, true); - dockerPruneImages(); + dockerPruneImages(context.getConfiguration()); super.cleanup(context); } - private void dockerPruneImages() { + private void dockerPruneImages(Configuration conf) { try { LOG.info("Pruning docker images"); int maxImages = 5; + + + String dockerPruneOpts = conf.get(DOCKER_PRUNE_OPTS, ""); + Command command = new Command(new String[]{"bash", "-c", "[ $(docker image ls --format json | wc -l) -gt " + maxImages + " ] " - + "&& echo 'Run docker image prune' && docker image prune -f --all --filter label!=storage='do_not_delete'" + + "&& echo 'Run docker image prune' && docker image prune -f --all " + dockerPruneOpts + "|| echo 'Skipping docker image prune. Less than " + maxImages + " images.'"}, Collections.emptyMap()); command.run(); int ecode = command.getExitValue(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriverTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriverTest.java index 0052e80579..2a352502b9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriverTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/analysis/gwas/FisherTestDriverTest.java @@ -20,15 +20,16 @@ import org.opencb.opencga.storage.hadoop.variant.VariantHbaseTestUtils; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; -import java.io.*; +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.InputStreamReader; import java.net.URI; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; @Category(LongTests.class) public class FisherTestDriverTest extends VariantStorageBaseTest implements HadoopVariantStorageTest { @@ -78,18 +79,17 @@ public void testFisher() throws Exception { .map(s -> metadataManager.getSampleName(studyMetadata.getId(), s)) .collect(Collectors.toList()); + URI local1 = localOut.resolve("fisher_result.tsv"); ObjectMap objectMap = new ObjectMap() .append(FisherTestDriver.CONTROL_COHORT, controlCohort) .append(FisherTestDriver.CASE_COHORT, caseCohort) - .append(FisherTestDriver.OUTPUT, "fisher_result"); + .append(FisherTestDriver.OUTPUT, local1); getMrExecutor().run(FisherTestDriver.class, FisherTestDriver.buildArgs( dbAdaptor.getArchiveTableName(1), dbAdaptor.getVariantTable(), 1, Collections.emptySet(), objectMap), ""); - URI local1 = copyToLocal("fisher_result"); - URI local2 = localOut.resolve("fisher_result2.tsv"); objectMap.append(FisherTestDriver.OUTPUT, local2) .append(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key(), "lof,missense_variant") @@ -100,15 +100,14 @@ public void testFisher() throws Exception { 1, Collections.emptySet(), objectMap), ""); -// URI local2 = copyToLocal("fisher_result2"); variantStorageEngine.loadVariantScore(local1, studyMetadata.getName(), "fisher1", "ALL", null, new VariantScoreFormatDescriptor(1, 16, 15), new ObjectMap()); variantStorageEngine.loadVariantScore(local2, studyMetadata.getName(), "fisher2", "ALL", null, new VariantScoreFormatDescriptor(1, 16, 15), new ObjectMap()); - FileSystem fs = FileSystem.get(configuration.get()); + FileSystem fs = FileSystem.get(local1, configuration.get()); Set lines1 = new HashSet<>(); int lines2 = 0; - try (BufferedReader is = new BufferedReader(new InputStreamReader(fs.open(new Path("fisher_result/part-r-00000"))))) { + try (BufferedReader is = new BufferedReader(new InputStreamReader(fs.open(new Path(local1))))) { String x = is.readLine(); while (StringUtils.isNotEmpty(x)) { // System.out.println(x); @@ -134,11 +133,4 @@ public void testFisher() throws Exception { Assert.assertThat(lines2, VariantMatchers.gt(0)); } - private URI copyToLocal(String s) throws IOException { - FileSystem fs = FileSystem.get(configuration.get()); - URI local = localOut.resolve(s + ".tsv"); - fs.copyToLocalFile(new Path(s + "/part-r-00000"), new Path(local)); - return local; - } - } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsTest.java index 5a3a0a1ea5..e5f9417c30 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/SampleVariantStatsTest.java @@ -1,6 +1,5 @@ package org.opencb.opencga.storage.hadoop.variant.stats; -import org.apache.commons.io.FileUtils; import org.junit.*; import org.junit.experimental.categories.Category; import org.junit.rules.ExternalResource; @@ -23,8 +22,8 @@ import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageTest; import org.opencb.opencga.storage.hadoop.variant.VariantHbaseTestUtils; -import java.io.File; import java.net.URI; +import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @@ -105,7 +104,7 @@ public void testAuto() throws Exception { Integer childId = engine.getMetadataManager().getSampleId(studyId, child); engine.getMetadataManager().updateSampleMetadata(studyId, childId, sampleMetadata -> sampleMetadata.setStats(stats.get(2))); - URI localOutputUri = newOutputUri(); + URI localOutputUri = newOutputUri().resolve("stats.json"); ObjectMap params = new ObjectMap().append(SampleVariantStatsDriver.SAMPLES, "auto") .append(SampleVariantStatsDriver.OUTPUT, localOutputUri); getMrExecutor().run(SampleVariantStatsDriver.class, SampleVariantStatsDriver.buildArgs(null, engine.getVariantTableName(), 1, null, params), ""); @@ -115,9 +114,7 @@ public void testAuto() throws Exception { Assert.assertEquals(stats, actualStats); - List files = new ArrayList<>(FileUtils.listFiles(new File(localOutputUri), null, true)); - Assert.assertEquals(1, files.size()); - List statsFromFile = JacksonUtils.getDefaultObjectMapper().readerFor(SampleVariantStats.class).readValues(files.get(0)).readAll(); + List statsFromFile = JacksonUtils.getDefaultObjectMapper().readerFor(SampleVariantStats.class).readValues(Paths.get(localOutputUri).toFile()).readAll(); Map statsFromFileMap = statsFromFile.stream().collect(Collectors.toMap(SampleVariantStats::getId, i -> i)); Assert.assertEquals(stats.get(0), statsFromFileMap.get(father)); Assert.assertEquals(stats.get(1), statsFromFileMap.get(mother)); @@ -151,9 +148,8 @@ public void testChild() throws Exception { List actualStats = readStatsFromMeta(); - // When processing a child, its parents must be processed as well - Assert.assertEquals(3, actualStats.size()); - Assert.assertEquals(stats, actualStats); + Assert.assertEquals(1, actualStats.size()); + Assert.assertEquals(stats.get(2), actualStats.get(0)); } public List readStatsFromMeta() throws StorageEngineException { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java index 0d9ab975ed..b3938e6175 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/walker/HadoopVariantWalkerTest.java @@ -15,6 +15,7 @@ import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageTest; import org.opencb.opencga.storage.hadoop.variant.VariantHbaseTestUtils; +import org.opencb.opencga.storage.hadoop.variant.mr.StreamVariantMapper; import java.io.IOException; import java.net.URI; @@ -109,8 +110,14 @@ public void exportDocker() throws Exception { URI outdir = newOutputUri(); String cmdPython1 = "python variant_walker.py walker_example Cut --length 30"; - + variantStorageEngine.getOptions().put(StreamVariantMapper.DOCKER_PRUNE_OPTS, " --filter label!=opencga_scope='test'"); variantStorageEngine.walkData(outdir.resolve("variant4.txt.gz"), VariantWriterFactory.VariantOutputFormat.JSON, new Query(), new QueryOptions(), dockerImage, cmdPython1); + + // Ensure that the docker image is not pruned + Command dockerImages = new Command(new String[]{"docker", "images", "--filter", "label=opencga_scope=test"}, Collections.emptyMap()); + dockerImages.run(); + assertEquals(0, dockerImages.getExitValue()); + assertEquals(2, dockerImages.getOutput().split("\n").length); } private static String buildDocker() throws IOException { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile index bd9f5511ad..ca17155b91 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/resources/variantWalker/Dockerfile @@ -6,7 +6,7 @@ WORKDIR /app ARG PYTHON_PATH="." -LABEL storage="do_not_delete" +LABEL opencga_scope="test" RUN echo ${PYTHON_PATH} # Copy the python directory contents into the container at /app From 54acc28d9a8fe0164d03fc116e27b0cc37ddc2ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 4 Dec 2024 09:58:56 +0000 Subject: [PATCH 089/122] cicd: Increase "Publish Test Report on GitHub" memory #TASK-6722 --- .github/workflows/test-analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index 1962c89ddc..e8dec69c79 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -108,7 +108,7 @@ jobs: - name: Publish Test Report on GitHub uses: scacap/action-surefire-report@v1 env: - NODE_OPTIONS: '--max_old_space_size=4096' + NODE_OPTIONS: '--max_old_space_size=6144' ## Skip cancelled() ## https://docs.github.com/en/actions/learn-github-actions/expressions#cancelled if: success() || failure() From 4e96492c592c70fca06823908537071c6f05b4e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 4 Dec 2024 09:59:10 +0000 Subject: [PATCH 090/122] core: Fix NumberFormatException from IOUtils. #TASK-6722 --- .../main/java/org/opencb/opencga/core/common/IOUtils.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java index bfc9f3524b..2446cc8956 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java @@ -28,7 +28,6 @@ import java.util.LinkedList; import java.util.List; import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; @@ -399,12 +398,12 @@ public static long fromHumanReadableToByte(String value, boolean assumeBinary) { value = value.substring(0, value.length() - 1); } final boolean si; - if (assumeBinary) { + if (value.endsWith("i")) { si = false; + value = value.substring(0, value.length() - 1); } else { - if (value.endsWith("i")) { + if (assumeBinary) { si = false; - value = value.substring(0, value.length() - 1); } else { si = true; } From 3f8ee12390d8e9920243144cf7e93f1fb735be12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 12 Dec 2024 09:07:05 +0000 Subject: [PATCH 091/122] storage: Tune outdated cohort stats warning event. #TASK-7225 --- .../variant/query/projection/VariantQueryProjectionParser.java | 2 +- .../core/variant/stats/VariantStatisticsManagerTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java index 95add22ec8..d04c94c7f7 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java @@ -154,7 +154,7 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti message += " The statistics have been calculated with " + invalidStatsNumSamples + " samples, " + "while the total number of samples in the cohort is " + numSampmles + "."; } - message += " To display updated statistics, please execute variant-stats-index."; + message += " To display updated statistics, please contact your Organization / Study Administrator."; events.add(new Event(Event.Type.WARNING, message)); } else if (status == TaskMetadata.Status.RUNNING) { String message = "Please note that the Cohort Stats for " diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java index e90b49cff4..646c212635 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java @@ -169,7 +169,7 @@ public void queryInvalidStats() throws Exception { assertEquals(1, result.getEvents().size()); assertEquals("Please note that the Cohort Stats for '1000g:cohort2' are currently outdated." + " The statistics have been calculated with 2 samples, while the total number of samples in the cohort is 4." + - " To display updated statistics, please execute variant-stats-index.", result.getEvents().get(0).getMessage()); + " To display updated statistics, please contact your Organization / Study Administrator.", result.getEvents().get(0).getMessage()); VariantStorageEngine engineMock = Mockito.spy(variantStorageEngine); VariantStatisticsManager statsManagerMock = Mockito.spy(variantStorageEngine.newVariantStatisticsManager()); From 852ffcae7fe8ce913590a2ad278d3a77d29b7a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 19 Dec 2024 10:58:10 +0000 Subject: [PATCH 092/122] core: Remove unused method. #TASK-6722 --- .../opencb/opencga/core/common/IOUtils.java | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java index 2446cc8956..e5cf6fbf4b 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/IOUtils.java @@ -16,8 +16,6 @@ package org.opencb.opencga.core.common; -import org.opencb.commons.run.ParallelTaskRunner; - import java.io.*; import java.nio.ByteBuffer; import java.nio.charset.Charset; @@ -28,7 +26,6 @@ import java.util.LinkedList; import java.util.List; import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; import java.util.zip.ZipEntry; @@ -477,48 +474,4 @@ public static void copyBytesParallel(InputStream is, OutputStream os, int buffer throw new IOException(exception.get()); } } - - public static void copyBytesParallel2(InputStream is, OutputStream os, int bufferSize) throws IOException { - - List buffersPool = Collections.synchronizedList(new LinkedList<>()); - ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() - .setNumTasks(1) - .setCapacity(5) - .setSorted(true) - .build(); - ParallelTaskRunner runner = new ParallelTaskRunner<>(batchSize -> { - try { - ByteBuffer buf = buffersPool.isEmpty() ? ByteBuffer.allocate(bufferSize) : buffersPool.remove(0); - int bytesRead = is.read(buf.array()); - if (bytesRead > 0) { - if (bytesRead != buf.array().length) { - buf.limit(bytesRead); - buf.rewind(); - } - return Collections.singletonList(buf); - } else { - return Collections.emptyList(); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }, t -> t, batch -> { - try { - for (ByteBuffer buf : batch) { - os.write(buf.array(), 0, buf.limit()); - // Return the buffer to the pool - buf.clear(); - buffersPool.add(buf); - } - } catch (IOException e1) { - throw new UncheckedIOException(e1); - } - return true; - }, config); - try { - runner.run(); - } catch (ExecutionException e) { - throw new IOException(e); - } - } } From 005855f4306c1213481ec52ebed90c7651ee1304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 19 Dec 2024 17:23:07 +0000 Subject: [PATCH 093/122] storage: Do not add new abstract methods to VariantStorageEngine. #TASK-6722 --- .../opencga/storage/core/variant/VariantStorageEngine.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 81ddc4c0e3..e559592aa2 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -326,9 +326,11 @@ public List walkData(URI outputFile, VariantWriterFactory.VariantOutputForm } - public abstract List walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, + public List walkData(URI outputFile, VariantOutputFormat format, Query query, QueryOptions queryOptions, String commandLine) - throws StorageEngineException; + throws StorageEngineException { + throw new UnsupportedOperationException(); + } /** * Creates a new {@link VariantExporter} for the current backend. From 38070512e802d235c1e5ddbb51b27c43ce3ac424 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 8 Jan 2025 16:06:22 +0100 Subject: [PATCH 094/122] cicd: added if github.event.review.state approved #TASK-7301 --- .github/workflows/pull-request-approved.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index eb0e45fadc..15b35b77ad 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -7,6 +7,7 @@ on: jobs: calculate-xetabase-branch: + if: github.event.review.state == 'approved' name: Calculate Xetabase branch runs-on: ubuntu-22.04 outputs: @@ -31,6 +32,7 @@ jobs: ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }} test: + if: github.event.review.state == 'approved' name: "Run all tests before merging" needs: calculate-xetabase-branch uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop From b7c7042d1799e991bf06ea56a96d01e53cdf23f6 Mon Sep 17 00:00:00 2001 From: pfurio Date: Fri, 10 Jan 2025 11:05:27 +0100 Subject: [PATCH 095/122] models: prepare for federations, #TASK-7192 --- .../app/misc/clients/java_client_generator.py | 7 +- .../app/misc/clients/r_client_generator.py | 1 + .../app/misc/clients/rest_client_generator.py | 1 + .../opencga/app/cli/CommandExecutor.java | 4 +- .../VariantInternalCommandExecutor.java | 2 +- .../main/custom/CustomCommandExecutor.java | 2 +- .../custom/CustomFilesCommandExecutor.java | 2 +- .../custom/CustomJobsCommandExecutor.java | 2 +- .../custom/CustomStudiesCommandExecutor.java | 2 +- .../custom/CustomUsersCommandExecutor.java | 2 +- .../main/executors/AdminCommandExecutor.java | 2 +- .../AnalysisAlignmentCommandExecutor.java | 2 +- .../AnalysisClinicalCommandExecutor.java | 2 +- .../AnalysisVariantCommandExecutor.java | 2 +- .../executors/CohortsCommandExecutor.java | 2 +- .../DiseasePanelsCommandExecutor.java | 2 +- .../executors/FamiliesCommandExecutor.java | 2 +- .../main/executors/FilesCommandExecutor.java | 2 +- .../executors/IndividualsCommandExecutor.java | 2 +- .../main/executors/JobsCommandExecutor.java | 2 +- .../main/executors/MetaCommandExecutor.java | 2 +- .../executors/OpencgaCommandExecutor.java | 2 +- ...erationsVariantStorageCommandExecutor.java | 2 +- .../OrganizationsCommandExecutor.java | 2 +- .../executors/ProjectsCommandExecutor.java | 2 +- .../executors/SamplesCommandExecutor.java | 2 +- .../executors/StudiesCommandExecutor.java | 2 +- .../main/executors/UsersCommandExecutor.java | 2 +- .../cli/main/processors/CommandProcessor.java | 4 +- .../opencga/app/cli/main/shell/Shell.java | 3 +- .../opencga/app/cli/main/utils/JobsLog.java | 2 +- .../app/cli/main/utils/JobsTopManager.java | 2 +- .../app/cli/session/SessionManager.java | 6 +- opencga-catalog/pom.xml | 4 - .../auth/authentication/JwtManager.java | 5 +- .../catalog/db/api/OrganizationDBAdaptor.java | 3 + .../db/mongodb/MetaMongoDBAdaptor.java | 2 +- .../mongodb/OrganizationMongoDBAdaptor.java | 72 ++++++- .../catalog/managers/CatalogManager.java | 2 +- .../catalog/managers/OrganizationManager.java | 4 +- .../opencga/catalog/managers/UserManager.java | 7 +- .../CatalogManagerExternalResource.java | 4 +- .../opencga/catalog/utils/JwtUtilsTest.java | 1 + .../src/test/resources/pedigree-graph/ped.R | 202 ++++++++++++++++++ opencga-client/pom.xml | 75 ++----- .../opencga/client/rest/OpenCGAClient.java | 9 +- .../client/rest/clients/AdminClient.java | 7 +- .../client/rest/clients/AlignmentClient.java | 7 +- .../rest/clients/ClinicalAnalysisClient.java | 7 +- .../client/rest/clients/CohortClient.java | 7 +- .../rest/clients/DiseasePanelClient.java | 7 +- .../client/rest/clients/FamilyClient.java | 7 +- .../client/rest/clients/FileClient.java | 7 +- .../client/rest/clients/GA4GHClient.java | 7 +- .../client/rest/clients/IndividualClient.java | 7 +- .../client/rest/clients/JobClient.java | 7 +- .../client/rest/clients/MetaClient.java | 7 +- .../rest/clients/OrganizationClient.java | 7 +- .../client/rest/clients/ProjectClient.java | 7 +- .../client/rest/clients/SampleClient.java | 7 +- .../client/rest/clients/StudyClient.java | 7 +- .../client/rest/clients/UserClient.java | 7 +- .../client/rest/clients/VariantClient.java | 7 +- .../rest/clients/VariantOperationClient.java | 7 +- .../config/ClientConfigurationTest.java | 59 ----- .../client/rest/OpenCGAClientTest.java | 36 ---- .../opencga/client/rest/UserClientTest.java | 91 -------- .../opencga/client/rest/WSTestServer.java | 99 --------- .../client/rest/WorkEnvironmentTest.java | 114 ---------- opencga-core/pom.xml | 17 ++ .../opencga/core/api/FieldConstants.java | 22 ++ .../opencga/core/client/GenericClient.java | 86 ++++++++ .../opencga/core/client/ParentClient.java | 38 ++-- .../opencga/core/common/JacksonUtils.java | 8 + .../opencb/opencga/core/common}/JwtUtils.java | 28 ++- .../config/client}/ClientConfiguration.java | 11 +- .../core/config/client}/GrpcConfig.java | 2 +- .../core/config/client}/HostConfig.java | 2 +- .../core/config/client}/QueryRestConfig.java | 2 +- .../core/config/client}/RestConfig.java | 2 +- .../core}/exceptions/ClientException.java | 2 +- .../opencga/core/models/JwtPayload.java | 73 ++++++- .../opencga/core/models/common/Enums.java | 3 + .../core/models/federation/Federation.java | 44 ++++ .../models/federation/FederationClient.java | 145 +++++++++++++ .../federation/FederationClientMixin.java | 7 + .../models/federation/FederationServer.java | 120 +++++++++++ .../FederationServerCreateParams.java | 77 +++++++ .../federation/FederationServerMixin.java | 7 + .../models/organizations/Organization.java | 14 ++ .../opencga/core/models/user/Account.java | 15 ++ .../cli/ExecutorsCliRestApiWriter.java | 2 +- pom.xml | 21 +- 93 files changed, 1138 insertions(+), 603 deletions(-) create mode 100644 opencga-catalog/src/test/resources/pedigree-graph/ped.R delete mode 100644 opencga-client/src/test/java/org/opencb/opencga/client/config/ClientConfigurationTest.java delete mode 100644 opencga-client/src/test/java/org/opencb/opencga/client/rest/OpenCGAClientTest.java delete mode 100644 opencga-client/src/test/java/org/opencb/opencga/client/rest/UserClientTest.java delete mode 100644 opencga-client/src/test/java/org/opencb/opencga/client/rest/WSTestServer.java delete mode 100644 opencga-client/src/test/java/org/opencb/opencga/client/rest/WorkEnvironmentTest.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java rename opencga-client/src/main/java/org/opencb/opencga/client/rest/AbstractParentClient.java => opencga-core/src/main/java/org/opencb/opencga/core/client/ParentClient.java (96%) rename {opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils => opencga-core/src/main/java/org/opencb/opencga/core/common}/JwtUtils.java (62%) rename {opencga-client/src/main/java/org/opencb/opencga/client/config => opencga-core/src/main/java/org/opencb/opencga/core/config/client}/ClientConfiguration.java (95%) rename {opencga-client/src/main/java/org/opencb/opencga/client/config => opencga-core/src/main/java/org/opencb/opencga/core/config/client}/GrpcConfig.java (95%) rename {opencga-client/src/main/java/org/opencb/opencga/client/config => opencga-core/src/main/java/org/opencb/opencga/core/config/client}/HostConfig.java (94%) rename {opencga-client/src/main/java/org/opencb/opencga/client/config => opencga-core/src/main/java/org/opencb/opencga/core/config/client}/QueryRestConfig.java (96%) rename {opencga-client/src/main/java/org/opencb/opencga/client/config => opencga-core/src/main/java/org/opencb/opencga/core/config/client}/RestConfig.java (98%) rename {opencga-client/src/main/java/org/opencb/opencga/client => opencga-core/src/main/java/org/opencb/opencga/core}/exceptions/ClientException.java (95%) create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java diff --git a/opencga-app/app/misc/clients/java_client_generator.py b/opencga-app/app/misc/clients/java_client_generator.py index f65a89983e..36a288b774 100755 --- a/opencga-app/app/misc/clients/java_client_generator.py +++ b/opencga-app/app/misc/clients/java_client_generator.py @@ -44,8 +44,9 @@ def get_imports(self): headers.append('') imports = set() - imports.add('org.opencb.opencga.client.exceptions.ClientException;') - imports.add('org.opencb.opencga.client.config.ClientConfiguration;') + imports.add('org.opencb.opencga.core.client.ParentClient;') + imports.add('org.opencb.opencga.core.exceptions.ClientException;') + imports.add('org.opencb.opencga.core.config.client.ClientConfiguration;') # imports.add(parentPackage + '.*;') imports.add('org.opencb.opencga.client.rest.*;') imports.add('org.opencb.opencga.core.response.RestResponse;') @@ -79,7 +80,7 @@ def get_class_definition(self, category): parentClientClass = parentClientClass[0] parentClientClass = parentClientClass.rsplit('/', 1)[1].split('.')[0] else: - parentClientClass = "AbstractParentClient" + parentClientClass = "ParentClient" text = [] text.append('') diff --git a/opencga-app/app/misc/clients/r_client_generator.py b/opencga-app/app/misc/clients/r_client_generator.py index 2d2a4840a9..8693de5f75 100644 --- a/opencga-app/app/misc/clients/r_client_generator.py +++ b/opencga-app/app/misc/clients/r_client_generator.py @@ -11,6 +11,7 @@ def __init__(self, server_url, output_dir): super().__init__(server_url, output_dir) self.categories = { + 'Federations': 'Federation', 'Organizations': 'Organization', 'Users': 'User', 'Projects': 'Project', diff --git a/opencga-app/app/misc/clients/rest_client_generator.py b/opencga-app/app/misc/clients/rest_client_generator.py index 53ed2bdfd8..06c533f0ed 100644 --- a/opencga-app/app/misc/clients/rest_client_generator.py +++ b/opencga-app/app/misc/clients/rest_client_generator.py @@ -35,6 +35,7 @@ def __init__(self, rest_api_file, output_dir): 'panels/import': {'method_name': 'import_panels'} } self.categories = { + 'Federations': 'Federation', 'Organizations': 'Organization', 'Users': 'User', 'Projects': 'Project', diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/CommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/CommandExecutor.java index ee5c57c3cb..7195674bf4 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/CommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/CommandExecutor.java @@ -27,8 +27,8 @@ import org.opencb.commons.utils.PrintUtils; import org.opencb.opencga.app.cli.main.utils.CommandLineUtils; import org.opencb.opencga.app.cli.session.SessionManager; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.config.Configuration; import org.opencb.opencga.core.config.storage.StorageConfiguration; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java index 7d20deccb0..1156182cca 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java @@ -56,7 +56,7 @@ import org.opencb.opencga.app.cli.internal.options.VariantCommandOptions; import org.opencb.opencga.catalog.db.api.SampleDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogException; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.core.common.YesNoAuto; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomCommandExecutor.java index 3ded501553..885e6abdba 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomCommandExecutor.java @@ -18,7 +18,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.app.cli.session.SessionManager; -import org.opencb.opencga.client.config.ClientConfiguration; +import org.opencb.opencga.core.config.client.ClientConfiguration; import org.opencb.opencga.client.rest.OpenCGAClient; import org.slf4j.Logger; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomFilesCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomFilesCommandExecutor.java index 39baa4b29e..e5114ca617 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomFilesCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomFilesCommandExecutor.java @@ -17,7 +17,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.app.cli.session.SessionManager; -import org.opencb.opencga.client.config.ClientConfiguration; +import org.opencb.opencga.core.config.client.ClientConfiguration; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.response.RestResponse; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomJobsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomJobsCommandExecutor.java index dffedddcab..687414aa67 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomJobsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomJobsCommandExecutor.java @@ -22,7 +22,7 @@ import org.opencb.opencga.app.cli.main.utils.JobsTopManager; import org.opencb.opencga.app.cli.session.SessionManager; import org.opencb.opencga.catalog.db.api.JobDBAdaptor; -import org.opencb.opencga.client.config.ClientConfiguration; +import org.opencb.opencga.core.config.client.ClientConfiguration; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.models.job.JobTop; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomStudiesCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomStudiesCommandExecutor.java index a7ffdc3633..ddb8772097 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomStudiesCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomStudiesCommandExecutor.java @@ -22,7 +22,7 @@ import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.io.IOManager; import org.opencb.opencga.catalog.io.IOManagerFactory; -import org.opencb.opencga.client.config.ClientConfiguration; +import org.opencb.opencga.core.config.client.ClientConfiguration; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.study.TemplateParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java index c608f5e2c1..a47aa3955e 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/custom/CustomUsersCommandExecutor.java @@ -20,7 +20,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.app.cli.main.utils.CommandLineUtils; import org.opencb.opencga.app.cli.session.SessionManager; -import org.opencb.opencga.client.config.ClientConfiguration; +import org.opencb.opencga.core.config.client.ClientConfiguration; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.response.QueryType; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AdminCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AdminCommandExecutor.java index 99135ede6b..6722731a11 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AdminCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AdminCommandExecutor.java @@ -12,8 +12,8 @@ import org.opencb.opencga.app.cli.main.options.AdminCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.utils.ParamUtils.AddRemoveAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.Acl; import org.opencb.opencga.core.models.admin.GroupSyncParams; import org.opencb.opencga.core.models.admin.InstallationParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java index 493fdedecb..5f6286aa7a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisAlignmentCommandExecutor.java @@ -13,8 +13,8 @@ import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; import org.opencb.opencga.app.cli.main.options.AnalysisAlignmentCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.alignment.AlignmentGeneCoverageStatsParams; import org.opencb.opencga.core.models.alignment.AlignmentIndexParams; import org.opencb.opencga.core.models.alignment.AlignmentQcParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java index 07dbf9230f..2368f5c7f8 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java @@ -22,8 +22,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.CompleteUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.SaveInterpretationAs; import org.opencb.opencga.catalog.utils.ParamUtils.UpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByGeneSummary; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByIndividual; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByIndividualSummary; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java index 57edea737f..2e3a0a190b 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java @@ -22,8 +22,8 @@ import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; import org.opencb.opencga.app.cli.main.options.AnalysisVariantCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByGene; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByIndividual; import org.opencb.opencga.core.models.clinical.ExomiserWrapperParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java index 287c2223a0..55023483d9 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/CohortsCommandExecutor.java @@ -14,8 +14,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.CompleteUpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.cohort.Cohort; import org.opencb.opencga.core.models.cohort.CohortAclEntryList; import org.opencb.opencga.core.models.cohort.CohortAclUpdateParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/DiseasePanelsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/DiseasePanelsCommandExecutor.java index d2fde239e7..be88cbdb35 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/DiseasePanelsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/DiseasePanelsCommandExecutor.java @@ -12,8 +12,8 @@ import org.opencb.opencga.app.cli.main.options.DiseasePanelsCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.panel.Panel; import org.opencb.opencga.core.models.panel.PanelAclEntryList; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FamiliesCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FamiliesCommandExecutor.java index dc5a1878e4..d14515c990 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FamiliesCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FamiliesCommandExecutor.java @@ -14,8 +14,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.CompleteUpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.StatusParams; import org.opencb.opencga.core.models.common.TsvAnnotationParams; import org.opencb.opencga.core.models.family.Family; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FilesCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FilesCommandExecutor.java index 873a16baac..a9407bc05d 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FilesCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/FilesCommandExecutor.java @@ -19,8 +19,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.CompleteUpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.alignment.AlignmentFileQualityControl; import org.opencb.opencga.core.models.alignment.CoverageFileQualityControl; import org.opencb.opencga.core.models.common.StatusParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/IndividualsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/IndividualsCommandExecutor.java index dcf24216d8..27babc7b80 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/IndividualsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/IndividualsCommandExecutor.java @@ -18,8 +18,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.CompleteUpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.StatusParams; import org.opencb.opencga.core.models.common.TsvAnnotationParams; import org.opencb.opencga.core.models.individual.Individual; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/JobsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/JobsCommandExecutor.java index ffc4c7a61a..8f6d8f61b8 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/JobsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/JobsCommandExecutor.java @@ -14,8 +14,8 @@ import org.opencb.opencga.app.cli.main.options.JobsCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.file.FileContent; import org.opencb.opencga.core.models.job.Job; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/MetaCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/MetaCommandExecutor.java index 3a674a32c6..d81dc101ba 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/MetaCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/MetaCommandExecutor.java @@ -11,8 +11,8 @@ import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; import org.opencb.opencga.app.cli.main.options.MetaCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.response.QueryType; import org.opencb.opencga.core.response.RestResponse; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OpencgaCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OpencgaCommandExecutor.java index f1e0c47ad7..9ede8e60a5 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OpencgaCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OpencgaCommandExecutor.java @@ -29,7 +29,7 @@ import org.opencb.opencga.app.cli.session.SessionManager; import org.opencb.opencga.catalog.db.api.ProjectDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.models.project.Project; import org.opencb.opencga.core.models.study.Study; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OperationsVariantStorageCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OperationsVariantStorageCommandExecutor.java index 35f0c49c31..7c22985198 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OperationsVariantStorageCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OperationsVariantStorageCommandExecutor.java @@ -11,11 +11,11 @@ import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; import org.opencb.opencga.app.cli.main.options.OperationsVariantStorageCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.YesNoAuto; import org.opencb.opencga.core.config.storage.CellBaseConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.operations.variant.JulieParams; import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OrganizationsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OrganizationsCommandExecutor.java index caefda1d32..3021e70548 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OrganizationsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/OrganizationsCommandExecutor.java @@ -14,9 +14,9 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AddRemoveAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.UpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.config.Optimizations; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.notes.Note; import org.opencb.opencga.core.models.notes.NoteCreateParams; import org.opencb.opencga.core.models.notes.NoteType; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/ProjectsCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/ProjectsCommandExecutor.java index 929d89e1e4..ad71c1b483 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/ProjectsCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/ProjectsCommandExecutor.java @@ -10,9 +10,9 @@ import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; import org.opencb.opencga.app.cli.main.options.ProjectsCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.config.storage.CellBaseConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.project.Project; import org.opencb.opencga.core.models.project.ProjectCreateParams; import org.opencb.opencga.core.models.project.ProjectOrganism; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/SamplesCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/SamplesCommandExecutor.java index c8e02456a4..c8eb481480 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/SamplesCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/SamplesCommandExecutor.java @@ -15,8 +15,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AclAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; import org.opencb.opencga.catalog.utils.ParamUtils.CompleteUpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.ExternalSource; import org.opencb.opencga.core.models.common.RgaIndex.Status; import org.opencb.opencga.core.models.common.StatusParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/StudiesCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/StudiesCommandExecutor.java index 009be9f6b7..7bda32829c 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/StudiesCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/StudiesCommandExecutor.java @@ -21,8 +21,8 @@ import org.opencb.opencga.catalog.utils.ParamUtils.AddRemoveAction; import org.opencb.opencga.catalog.utils.ParamUtils.AddRemoveForceRemoveAction; import org.opencb.opencga.catalog.utils.ParamUtils.BasicUpdateAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.AclEntryList; import org.opencb.opencga.core.models.audit.AuditRecord.Status.Result; import org.opencb.opencga.core.models.audit.AuditRecord; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/UsersCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/UsersCommandExecutor.java index 5f8dfce148..57ece8aed9 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/UsersCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/UsersCommandExecutor.java @@ -16,8 +16,8 @@ import org.opencb.opencga.app.cli.main.options.UsersCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.utils.ParamUtils.AddRemoveAction; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.ConfigUpdateParams; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java index 8c43c340ab..7a49c1da46 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/processors/CommandProcessor.java @@ -12,8 +12,8 @@ import org.opencb.opencga.app.cli.main.utils.CommandLineUtils; import org.opencb.opencga.app.cli.session.Session; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.catalog.utils.JwtUtils; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.common.JwtUtils; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.models.project.Project; import org.opencb.opencga.core.models.study.Study; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/shell/Shell.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/shell/Shell.java index 00f8de6d68..b0abac9d46 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/shell/Shell.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/shell/Shell.java @@ -10,7 +10,6 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.utils.PrintUtils; import org.opencb.opencga.app.cli.GeneralCliOptions; -import org.opencb.opencga.app.cli.main.OpenCgaCompleterImpl; import org.opencb.opencga.app.cli.main.OpencgaCliOptionsParser; import org.opencb.opencga.app.cli.main.OpencgaMain; import org.opencb.opencga.app.cli.main.executors.OpencgaCommandExecutor; @@ -18,7 +17,7 @@ import org.opencb.opencga.app.cli.main.utils.CommandLineUtils; import org.opencb.opencga.app.cli.session.Session; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.models.study.Study; import org.opencb.opencga.core.models.user.AuthenticationResponse; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsLog.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsLog.java index 645d77c489..8e0e35cf01 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsLog.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsLog.java @@ -21,7 +21,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.app.cli.main.options.JobsCommandOptions; import org.opencb.opencga.catalog.db.api.JobDBAdaptor; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.models.file.FileContent; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsTopManager.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsTopManager.java index 15d7b63e6e..a53e2848bb 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsTopManager.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/utils/JobsTopManager.java @@ -23,7 +23,7 @@ import org.opencb.opencga.app.cli.main.io.Table; import org.opencb.opencga.app.cli.main.io.TextOutputWriter; import org.opencb.opencga.catalog.db.api.JobDBAdaptor; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.models.common.Enums; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/session/SessionManager.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/session/SessionManager.java index f822716041..4c131b6f2a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/session/SessionManager.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/session/SessionManager.java @@ -24,9 +24,9 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.app.cli.main.utils.CommandLineUtils; import org.opencb.opencga.catalog.db.api.ProjectDBAdaptor; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.config.HostConfig; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.config.client.HostConfig; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.OpenCGAClient; import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.models.project.Project; diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index df6a3d06d7..f65e86b3e1 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -133,10 +133,6 @@ com.google.code.findbugs annotations - - net.minidev - json-smart - com.google.guava guava diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java index fd0462e3a9..81fdc1c2eb 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java @@ -18,6 +18,7 @@ import io.jsonwebtoken.*; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.models.JwtPayload; import org.slf4j.Logger; @@ -124,13 +125,13 @@ public void validateToken(String token, Key publicKey) throws CatalogAuthenticat public JwtPayload getPayload(String token) throws CatalogAuthenticationException { Claims body = parseClaims(token, publicKey).getBody(); return new JwtPayload(body.getSubject(), body.getAudience(), getAuthOrigin(body), body.getIssuer(), body.getIssuedAt(), - body.getExpiration(), token); + body.getExpiration(), JwtUtils.getFederations(body), token); } public JwtPayload getPayload(String token, Key publicKey) throws CatalogAuthenticationException { Claims body = parseClaims(token, publicKey).getBody(); return new JwtPayload(body.getSubject(), body.getAudience(), getAuthOrigin(body), body.getIssuer(), body.getIssuedAt(), - body.getExpiration(), token); + body.getExpiration(), JwtUtils.getFederations(body), token); } private AuthenticationOrigin.AuthenticationType getAuthOrigin(Claims claims) { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java index 93e65c6312..2c30ab281b 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java @@ -29,6 +29,9 @@ enum QueryParams implements QueryParam { ADMINS("admins", TEXT_ARRAY, ""), INTERNAL("internal", OBJECT, ""), INTERNAL_MIGRATION_EXECUTIONS("internal.migrationExecutions", OBJECT, ""), + FEDERATION("federation", OBJECT, ""), + FEDERATION_CLIENTS("federation.clients", OBJECT, ""), + FEDERATION_SERVERS("federation.servers", OBJECT, ""), CONFIGURATION("configuration", OBJECT, ""), CONFIGURATION_OPTIMIZATIONS("configuration.optimizations", OBJECT, ""), CONFIGURATION_AUTHENTICATION_ORIGINS("configuration." + AUTH_ORIGINS_FIELD, OBJECT, ""), diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java index b2aba25019..e785b54907 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java @@ -30,7 +30,7 @@ import org.opencb.opencga.catalog.db.api.MetaDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogDBException; import org.opencb.opencga.catalog.exceptions.CatalogException; -import org.opencb.opencga.catalog.utils.JwtUtils; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.config.Admin; import org.opencb.opencga.core.config.Configuration; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java index b971f5a574..2df45729a5 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java @@ -27,6 +27,8 @@ import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; +import org.opencb.opencga.core.models.federation.FederationServer; +import org.opencb.opencga.core.models.federation.FederationClient; import org.opencb.opencga.core.models.organizations.Organization; import org.opencb.opencga.core.response.OpenCGAResult; import org.slf4j.LoggerFactory; @@ -257,7 +259,9 @@ private UpdateDocument getValidatedUpdateParams(ClientSession clientSession, Obj throws CatalogParameterException, CatalogDBException { checkUpdatedParams(parameters, Arrays.asList(QueryParams.NAME.key(), QueryParams.OWNER.key(), QueryParams.CREATION_DATE.key(), QueryParams.MODIFICATION_DATE.key(), QueryParams.ADMINS.key(), - QueryParams.CONFIGURATION.key(), QueryParams.ATTRIBUTES.key())); + QueryParams.CONFIGURATION_OPTIMIZATIONS.key(), QueryParams.CONFIGURATION_TOKEN.key(), + QueryParams.CONFIGURATION_AUTHENTICATION_ORIGINS.key(), QueryParams.FEDERATION_CLIENTS.key(), + QueryParams.FEDERATION_SERVERS.key(), QueryParams.CONFIGURATION.key(), QueryParams.ATTRIBUTES.key())); UpdateDocument document = new UpdateDocument(); @@ -292,6 +296,42 @@ private UpdateDocument getValidatedUpdateParams(ClientSession clientSession, Obj } } + // FederationClient action + if (parameters.containsKey(QueryParams.FEDERATION_CLIENTS.key())) { + Map actionMap = queryOptions.getMap(Constants.ACTIONS, new HashMap<>()); + ParamUtils.AddRemoveAction operation = ParamUtils.AddRemoveAction.from(actionMap, QueryParams.FEDERATION_CLIENTS.key()); + String[] fedClients = {QueryParams.FEDERATION_CLIENTS.key()}; + switch (operation) { + case REMOVE: + fixFederationClientForRemoval(parameters); + filterObjectParams(parameters, document.getPull(), fedClients); + break; + case ADD: + filterObjectParams(parameters, document.getAddToSet(), fedClients); + break; + default: + throw new IllegalStateException("Unknown operation " + operation); + } + } + + // FederationServer action + if (parameters.containsKey(QueryParams.FEDERATION_SERVERS.key())) { + Map actionMap = queryOptions.getMap(Constants.ACTIONS, new HashMap<>()); + ParamUtils.AddRemoveAction operation = ParamUtils.AddRemoveAction.from(actionMap, QueryParams.FEDERATION_SERVERS.key()); + String[] fedClients = {QueryParams.FEDERATION_SERVERS.key()}; + switch (operation) { + case REMOVE: + fixFederationServerForRemoval(parameters); + filterObjectParams(parameters, document.getPull(), fedClients); + break; + case ADD: + filterObjectParams(parameters, document.getAddToSet(), fedClients); + break; + default: + throw new IllegalStateException("Unknown operation " + operation); + } + } + String owner = parameters.getString(QueryParams.OWNER.key(), null); if (StringUtils.isNotEmpty(owner)) { // Check user exists @@ -366,6 +406,36 @@ private UpdateDocument getValidatedUpdateParams(ClientSession clientSession, Obj return document; } + private void fixFederationClientForRemoval(ObjectMap parameters) { + if (parameters.get(QueryParams.FEDERATION_CLIENTS.key()) == null) { + return; + } + List federationParamList = new LinkedList<>(); + for (Object federationClient : parameters.getAsList(QueryParams.FEDERATION_CLIENTS.key())) { + if (federationClient instanceof FederationServer) { + federationParamList.add(new Document("id", ((FederationServer) federationClient).getId())); + } else { + federationParamList.add(new Document("id", ((Map) federationClient).get("id"))); + } + } + parameters.putNested(QueryParams.FEDERATION_CLIENTS.key(), federationParamList, false); + } + + private void fixFederationServerForRemoval(ObjectMap parameters) { + if (parameters.get(QueryParams.FEDERATION_SERVERS.key()) == null) { + return; + } + List federationParamList = new LinkedList<>(); + for (Object federationServer : parameters.getAsList(QueryParams.FEDERATION_SERVERS.key())) { + if (federationServer instanceof FederationClient) { + federationParamList.add(new Document("id", ((FederationClient) federationServer).getId())); + } else { + federationParamList.add(new Document("id", ((Map) federationServer).get("id"))); + } + } + parameters.putNested(QueryParams.FEDERATION_SERVERS.key(), federationParamList, false); + } + private void fixAuthOriginsForRemoval(ObjectMap parameters) { if (parameters.get(QueryParams.CONFIGURATION_AUTHENTICATION_ORIGINS.key()) == null) { return; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CatalogManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CatalogManager.java index 030505395d..9ae1c3a120 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CatalogManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CatalogManager.java @@ -37,7 +37,7 @@ import org.opencb.opencga.catalog.io.IOManagerFactory; import org.opencb.opencga.catalog.migration.MigrationManager; import org.opencb.opencga.catalog.utils.Constants; -import org.opencb.opencga.catalog.utils.JwtUtils; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.common.PasswordUtils; import org.opencb.opencga.core.common.UriUtils; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java index 20ec12a7f2..1d9ade5d1e 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java @@ -19,7 +19,7 @@ import org.opencb.opencga.catalog.exceptions.CatalogParameterException; import org.opencb.opencga.catalog.io.CatalogIOManager; import org.opencb.opencga.catalog.utils.Constants; -import org.opencb.opencga.catalog.utils.JwtUtils; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.catalog.utils.UuidUtils; import org.opencb.opencga.core.api.ParamConstants; @@ -32,6 +32,7 @@ import org.opencb.opencga.core.models.audit.AuditRecord; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.common.InternalStatus; +import org.opencb.opencga.core.models.federation.Federation; import org.opencb.opencga.core.models.organizations.*; import org.opencb.opencga.core.models.user.OrganizationUserUpdateParams; import org.opencb.opencga.core.models.user.User; @@ -546,6 +547,7 @@ private void validateOrganizationForCreation(Organization organization, String u organization.setOwner(userId); organization.setAdmins(Collections.emptyList()); organization.setProjects(Collections.emptyList()); + organization.setFederation(new Federation(Collections.emptyList(), Collections.emptyList())); if (organization.getConfiguration() == null) { organization.setConfiguration(new OrganizationConfiguration()); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java index 7e8e8cdffe..363ff2802d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java @@ -167,7 +167,12 @@ public OpenCGAResult create(User user, String password, String token) thro throw new CatalogException("Unknown authentication origin id '" + account.getAuthentication() + "'"); } } else { - account.setAuthentication(new Account.AuthenticationOrigin(CatalogAuthenticationManager.OPENCGA, false)); + if (account.getAuthentication() != null) { + account.getAuthentication().setId(AuthenticationOrigin.AuthenticationType.OPENCGA.name()); + } else { + account.setAuthentication(new Account.AuthenticationOrigin(AuthenticationOrigin.AuthenticationType.OPENCGA.name(), false, + false)); + } } // Set password expiration diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerExternalResource.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerExternalResource.java index 09c94ee33d..ecba360950 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerExternalResource.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerExternalResource.java @@ -30,7 +30,6 @@ import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.core.config.Configuration; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; @@ -93,7 +92,8 @@ public Path clearOpenCGAHome(String testName) throws IOException { // Pedigree graph analysis Path analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/pedigree-graph")).toAbsolutePath(); - FileInputStream inputStream = new FileInputStream("../opencga-app/app/analysis/pedigree-graph/ped.R"); + InputStream inputStream = getClass().getResource("/pedigree-graph/ped.R").openStream(); +// FileInputStream inputStream = new FileInputStream("../opencga-app/app/analysis/pedigree-graph/ped.R"); Files.copy(inputStream, analysisPath.resolve("ped.R"), StandardCopyOption.REPLACE_EXISTING); return opencgaHome; } diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/utils/JwtUtilsTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/utils/JwtUtilsTest.java index 056d14e4c0..2af4685b66 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/utils/JwtUtilsTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/utils/JwtUtilsTest.java @@ -2,6 +2,7 @@ import org.junit.Test; import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.testclassification.duration.ShortTests; @Category(ShortTests.class) diff --git a/opencga-catalog/src/test/resources/pedigree-graph/ped.R b/opencga-catalog/src/test/resources/pedigree-graph/ped.R new file mode 100644 index 0000000000..4fc2d51803 --- /dev/null +++ b/opencga-catalog/src/test/resources/pedigree-graph/ped.R @@ -0,0 +1,202 @@ +suppressMessages(library(optparse)) +suppressMessages(library(jsonlite)) +suppressMessages(library(kinship2)) +set.seed(42) + +# Load file data +load_file_data <- function(ped_fpath, no_fix_parents) { + # Load all pedigree + family_ped <- read.table(ped_fpath, header=TRUE, + colClasses = c(id="character", + dadid="character", + momid="character", + relation="character")) + + # Fixing bug where if all sexes are 3 (unknown) or 4 (terminated) the package fails + if (all(family_ped$sex > 2)) { + if (any(!is.na(family_ped$dadid))) { + father <- family_ped$dadid[!is.na(family_ped$dadid)][1] + family_ped[family_ped$id == father, ]$sex <- 1 + } else if (any(!is.na(family_ped$momid))) { + mother <- family_ped$momid[!is.na(family_ped$momid)][1] + family_ped[family_ped$id == mother, ]$sex <- 2 + } + } + + # Add missing parents + if (!no_fix_parents) { + parents_fixed <- with(family_ped, fixParents(id, dadid, momid, sex, missid=0)) # Fix parents + if (nrow(parents_fixed) > nrow(family_ped)) { # If new parents are added + # Create new rows to be able to merge family_ped and parents_fixed + na_df <- data.frame(matrix(NA, nrow = nrow(parents_fixed)-nrow(family_ped), ncol = ncol(family_ped))) + colnames(na_df) <- colnames(family_ped) + na_df$status <- 0 # set status "0=alive/missing" + # Rename added dadid and momid from 0 to NA + parents_fixed[parents_fixed$dadid == 0 & !is.na(parents_fixed$dadid),]$dadid <- NA + parents_fixed[parents_fixed$momid == 0 & !is.na(parents_fixed$momid),]$momid <- NA + # Add NA rows to original family_ped + family_ped <- rbind(family_ped, na_df) + } + family_ped <- cbind(parents_fixed, family_ped[5:ncol(family_ped)]) # Substitute new parents_fixed info to original family_ped + family_ped<- cbind(family_ped[, c("id", "dadid", "momid", "sex")], family_ped[, 5:ncol(family_ped)]) # Reorder columns + } + + # Extract affected info + affected <- as.data.frame(family_ped[, grepl("affected", names(family_ped))]) + if (ncol(affected) == 1) { + affected <- family_ped$affected + } else { + colnames(affected) <- unlist(lapply(colnames(affected), function (x) sub('affected.', '', x))) + affected <- as.matrix(affected) + } + + # Extract relation info + columns <- c("id1", "id2", "code") + rel_df <- data.frame(matrix(nrow = 0, ncol = length(columns))) # Create empty relationship data frame + colnames(rel_df) = columns + relation <- family_ped[!is.na(family_ped$relation), c("id", "relation")] # Remove NA and subset + if(nrow(relation) > 0) { + group_and_rel <- strsplit(as.character(relation$relation),',') # Split group and relationship code + relation <- data.frame(relation$id, do.call(rbind, group_and_rel)) # Combine id, group and relationship code + colnames(relation) <- c("id", "group", "code") + agg <- aggregate(id~group, data = relation, paste0, collapse=",") # Concatenate ids by group + for (row in 1:nrow(agg)) { # Get pair combinations of all ids in the same group + ids <- strsplit(as.character(agg$id),',') + ids_comb_matrix <- t(do.call(cbind, lapply(ids, function(x) combn(x,2)))) + } + codes <- do.call(rbind, lapply(ids_comb_matrix[,1], function(x) relation[relation$id==x, ]$code)) # Get code for each pair of ids + rel_matrix = cbind(ids_comb_matrix, codes) # Merge pair of ids and their corresponding code + colnames(rel_matrix) <- c("id1", "id2", "code") + rel_df <- as.data.frame(rel_matrix) # Convert matrix to data frame + rel_df$code <- as.numeric(rel_df$code) # Relationship code has to be numeric + } + + ped_info <- list("family_ped" = family_ped, "affected" = affected, "relation" = rel_df) + return(ped_info) +} + +# Generate the plot +create_plot <- function(ped_all, affected, no_legend, legend_pos, plot_title) { + if (legend_pos %in% c("topright", "bottomright")) { + margins <- c(5.1, 4.1, 4.1, 10.1) # bottom, left, top, right + } else { + margins <- c(5.1, 12.1, 4.1, 2.1) + } + if (no_legend | ncol(as.data.frame(affected)) == 1) { + plot_df <- plot(ped_all) + } else{ + plot_df <- plot(ped_all, + mar=margins, + density = seq(-1, 90, 90/ncol(affected)), + angle = rev(seq(-1, 90, 90/ncol(affected)))) + pedigree.legend(ped_all, location=legend_pos, radius=plot_df$boxh) + } + if (!is.null(plot_title)) { + title(main = plot_title) + } + return(plot_df) +} + +# Main function +plot_pedigree <- function(ped_fpath, out_dir, plot_format, coords_format, + no_legend, legend_pos, no_fix_parents, plot_title, + proband) { + # Gather pedigree info + ped_info <- load_file_data(ped_fpath, no_fix_parents) + family_ped <- ped_info$family_ped + affected <- ped_info$affected + relation <- ped_info$relation + + # Add arrow if proband exists + ids <- family_ped$id + if (!is.null(proband)) { + ids[ids==proband] <- paste(proband, intToUtf8(8599), sep='\n') + } + + # Create pedigree + ped_all <- pedigree(id=ids, + dadid=family_ped$dadid, + momid=family_ped$momid, + sex=family_ped$sex, + affected=affected, + status = family_ped$status, + relation = relation, + missid = 0) + + # Print pedigree plot + plot_formats <- unlist(strsplit(plot_format, ',')) + if ("png" %in% plot_formats) { + png(file=paste(out_dir, "pedigree.png", sep='/')) + plot_df <- create_plot(ped_all, affected, no_legend, legend_pos, plot_title) + garbage <- dev.off() + } + if ("svg" %in% plot_formats) { + svg(file=paste(out_dir, "pedigree.svg", sep='/')) + plot_df <- create_plot(ped_all, affected, no_legend, legend_pos, plot_title) + garbage <-dev.off() + } + + # Adding coordinates + ped_coords <- cbind(family_ped, round(plot_df$x, 2), round(plot_df$y, 2)) + colnames(ped_coords) <- c(colnames(family_ped), c("x", "y")) + + # Adding spouses + spouses <- as.vector(t(plot_df$plist$spouse)) # Get spouses + ind_order <- as.vector(t(plot_df$plist$nid)) # Get ind order in plot + spouses <- spouses[ind_order!=0] # Remove empty positions + ind_order <- ind_order[ind_order!=0] # Remove empty positions + ped_coords$spouse <- NA # Add new spouse column to family ped + for (i in 1:length(spouses)) { + if (spouses[i] == 1) { # If ind is linked to next ind + ped_coords[ind_order[i],]$spouse <- c(ped_coords[ind_order[i+1],]$id) + } + if (i!=1 && spouses[i] == 1 && spouses[i-1] == 1) { # If ind is linked to previous and next ind + ped_coords[ind_order[i],]$spouse <- paste(c(ped_coords[ind_order[i-1],]$id, ped_coords[ind_order[i],]$spouse), collapse=',') + } + if (i!=1 && spouses[i] == 0 && spouses[i-1] == 1) { # If ind is linked to previous ind + ped_coords[ind_order[i],]$spouse <- c(ped_coords[ind_order[i-1],]$id) + } + } + + # Output file with coordinates + coords_formats <- unlist(strsplit(coords_format, ',')) + if ("tsv" %in% coords_formats) { + write.table(ped_coords, file=paste(out_dir, "ped_coords.tsv", sep='/'), sep = '\t', quote=FALSE, row.names=FALSE) + } + if ("json" %in% coords_formats) { + write_json(ped_coords, path=paste(out_dir, "ped_coords.json", sep='/'), na='null') + } +} + + +# Command line interface +option_list <- list( + make_option(c("--plot_format"), type="character", default="svg", + help="Plot format, options: [\"svg\", \"png\"]. Default: \"svg\""), + make_option(c("--coords_format"), type="character", default="json", + help="Coords file format, options: [\"json\", \"tsv\"]. Default: \"json\""), + make_option(c("--no_legend"), type="logical", default=FALSE, action = "store_true", + help="Removes plot legend"), + make_option(c("--legend_pos"), type="character", default="topright", + help="Legend position, options: [\"bottomright\", \"bottomleft\", \"topleft\", \"topright\"]. Default: \"topright\""), + make_option(c("--no_fix_parents"), type="logical", default=FALSE, action = "store_true", + help="Stop automatic addition of missing parents"), + make_option(c("--plot_title"), type="character", default=NULL, help="Plot title"), + make_option(c("--proband"), type="character", default=NULL, help="Proband ID") +) +parser <- OptionParser(usage = "%prog ped_fpath out_dir [options]", option_list=option_list) +arguments <- parse_args(parser, positional_arguments = 2) +opt <- arguments$options +args <- arguments$args + +# Run main function +plot_pedigree(args[1], + args[2], + plot_format=opt$plot_format, + coords_format=opt$coords_format, + no_legend=opt$no_legend, + legend_pos=opt$legend_pos, + no_fix_parents=opt$no_fix_parents, + plot_title=opt$plot_title, + proband=opt$proband) + diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index ee6e9dbf30..1dbc5b79c5 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -98,67 +98,26 @@ jakarta.ws.rs jakarta.ws.rs-api - - org.opencb.opencga - opencga-catalog - test - - - org.opencb.opencga - opencga-catalog - test - test-jar - org.opencb.opencga opencga-server - test - - - org.eclipse.jetty - jetty-server - test - - - org.eclipse.jetty - jetty-servlet - test - - - org.glassfish.jersey.core - jersey-server - test - - - org.glassfish.jersey.containers - jersey-container-servlet-core - test - - - junit - junit - - - - - io.jsonwebtoken - jjwt-impl - runtime - - - org.glassfish.jersey.inject - jersey-hk2 - - - org.eclipse.jetty - jetty-webapp - test - - - io.jsonwebtoken - jjwt-jackson - runtime - + compile + + + + + + + + + + + + + + + + diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/OpenCGAClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/OpenCGAClient.java index 263bb99277..0cf41782ef 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/OpenCGAClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/OpenCGAClient.java @@ -20,8 +20,9 @@ import io.jsonwebtoken.Jwts; import org.apache.commons.lang3.StringUtils; import org.opencb.commons.datastore.core.Event; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.client.rest.clients.*; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.LoginParams; @@ -43,7 +44,7 @@ public class OpenCGAClient { protected String refreshToken; protected ClientConfiguration clientConfiguration; - protected Map clients; + protected Map clients; protected boolean throwExceptionOnError; public OpenCGAClient(ClientConfiguration clientConfiguration) { @@ -155,7 +156,7 @@ public MetaClient getMetaClient() { } @SuppressWarnings("unchecked") - protected T getClient(Class clazz, Supplier constructor) { + protected T getClient(Class clazz, Supplier constructor) { return (T) clients.computeIfAbsent(clazz.getName(), (k) -> { T t = constructor.get(); t.setThrowExceptionOnError(throwExceptionOnError); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AdminClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AdminClient.java index 928fd36134..41d59974fc 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AdminClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AdminClient.java @@ -17,9 +17,10 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.Acl; import org.opencb.opencga.core.models.admin.GroupSyncParams; import org.opencb.opencga.core.models.admin.InstallationParams; @@ -47,7 +48,7 @@ * This class contains methods for the Admin webservices. * PATH: admin */ -public class AdminClient extends AbstractParentClient { +public class AdminClient extends ParentClient { public AdminClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AlignmentClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AlignmentClient.java index a79559f5af..45702bdd0a 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AlignmentClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/AlignmentClient.java @@ -20,9 +20,10 @@ import org.opencb.biodata.models.alignment.GeneCoverageStats; import org.opencb.biodata.models.alignment.RegionCoverage; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.alignment.AlignmentGeneCoverageStatsParams; import org.opencb.opencga.core.models.alignment.AlignmentIndexParams; import org.opencb.opencga.core.models.alignment.AlignmentQcParams; @@ -50,7 +51,7 @@ * This class contains methods for the Alignment webservices. * PATH: analysis/alignment */ -public class AlignmentClient extends AbstractParentClient { +public class AlignmentClient extends ParentClient { public AlignmentClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java index 7e94ff8272..a1bdebaad9 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java @@ -19,9 +19,10 @@ import org.opencb.biodata.models.clinical.interpretation.ClinicalVariant; import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByGeneSummary; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByIndividual; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByIndividualSummary; @@ -65,7 +66,7 @@ * This class contains methods for the ClinicalAnalysis webservices. * PATH: analysis/clinical */ -public class ClinicalAnalysisClient extends AbstractParentClient { +public class ClinicalAnalysisClient extends ParentClient { public ClinicalAnalysisClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/CohortClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/CohortClient.java index 0bcb27158a..50fc3f3313 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/CohortClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/CohortClient.java @@ -18,9 +18,10 @@ import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.cohort.Cohort; import org.opencb.opencga.core.models.cohort.CohortAclEntryList; import org.opencb.opencga.core.models.cohort.CohortAclUpdateParams; @@ -46,7 +47,7 @@ * This class contains methods for the Cohort webservices. * PATH: cohorts */ -public class CohortClient extends AbstractParentClient { +public class CohortClient extends ParentClient { public CohortClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/DiseasePanelClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/DiseasePanelClient.java index 15d71d9859..e50211636e 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/DiseasePanelClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/DiseasePanelClient.java @@ -18,9 +18,10 @@ import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.panel.Panel; import org.opencb.opencga.core.models.panel.PanelAclEntryList; @@ -45,7 +46,7 @@ * This class contains methods for the DiseasePanel webservices. * PATH: panels */ -public class DiseasePanelClient extends AbstractParentClient { +public class DiseasePanelClient extends ParentClient { public DiseasePanelClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FamilyClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FamilyClient.java index 368cf2eb4a..d552462d32 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FamilyClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FamilyClient.java @@ -18,9 +18,10 @@ import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.TsvAnnotationParams; import org.opencb.opencga.core.models.family.Family; import org.opencb.opencga.core.models.family.FamilyAclEntryList; @@ -45,7 +46,7 @@ * This class contains methods for the Family webservices. * PATH: families */ -public class FamilyClient extends AbstractParentClient { +public class FamilyClient extends ParentClient { public FamilyClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FileClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FileClient.java index dcf3ae18ef..47c6f47c66 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FileClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/FileClient.java @@ -19,9 +19,10 @@ import java.io.DataInputStream; import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.TsvAnnotationParams; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.models.file.FileAclEntryList; @@ -53,7 +54,7 @@ * This class contains methods for the File webservices. * PATH: files */ -public class FileClient extends AbstractParentClient { +public class FileClient extends ParentClient { public FileClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/GA4GHClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/GA4GHClient.java index e1eb10bd8a..44c272e1dc 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/GA4GHClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/GA4GHClient.java @@ -17,9 +17,10 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.response.RestResponse; @@ -37,7 +38,7 @@ * This class contains methods for the GA4GH webservices. * PATH: ga4gh */ -public class GA4GHClient extends AbstractParentClient { +public class GA4GHClient extends ParentClient { public GA4GHClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/IndividualClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/IndividualClient.java index 571f7b8953..99212c4ad2 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/IndividualClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/IndividualClient.java @@ -18,9 +18,10 @@ import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.TsvAnnotationParams; import org.opencb.opencga.core.models.individual.Individual; import org.opencb.opencga.core.models.individual.IndividualAclEntryList; @@ -45,7 +46,7 @@ * This class contains methods for the Individual webservices. * PATH: individuals */ -public class IndividualClient extends AbstractParentClient { +public class IndividualClient extends ParentClient { public IndividualClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/JobClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/JobClient.java index 77c04be505..de9fec56dd 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/JobClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/JobClient.java @@ -18,9 +18,10 @@ import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.file.FileContent; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.job.JobAclEntryList; @@ -46,7 +47,7 @@ * This class contains methods for the Job webservices. * PATH: jobs */ -public class JobClient extends AbstractParentClient { +public class JobClient extends ParentClient { public JobClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/MetaClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/MetaClient.java index 8401614fec..82f22e4dd2 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/MetaClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/MetaClient.java @@ -18,9 +18,10 @@ import java.util.List; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.response.RestResponse; @@ -38,7 +39,7 @@ * This class contains methods for the Meta webservices. * PATH: meta */ -public class MetaClient extends AbstractParentClient { +public class MetaClient extends ParentClient { public MetaClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/OrganizationClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/OrganizationClient.java index f8fe3d8d7a..3b1f6ffb46 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/OrganizationClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/OrganizationClient.java @@ -17,9 +17,10 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.notes.Note; import org.opencb.opencga.core.models.notes.NoteCreateParams; import org.opencb.opencga.core.models.notes.NoteUpdateParams; @@ -47,7 +48,7 @@ * This class contains methods for the Organization webservices. * PATH: organizations */ -public class OrganizationClient extends AbstractParentClient { +public class OrganizationClient extends ParentClient { public OrganizationClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ProjectClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ProjectClient.java index 53ecaea461..59f62efda6 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ProjectClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ProjectClient.java @@ -17,9 +17,10 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.project.Project; import org.opencb.opencga.core.models.project.ProjectCreateParams; import org.opencb.opencga.core.models.project.ProjectUpdateParams; @@ -41,7 +42,7 @@ * This class contains methods for the Project webservices. * PATH: projects */ -public class ProjectClient extends AbstractParentClient { +public class ProjectClient extends ParentClient { public ProjectClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/SampleClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/SampleClient.java index 811dcbfe08..c932677d6e 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/SampleClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/SampleClient.java @@ -18,9 +18,10 @@ import java.lang.Object; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.common.TsvAnnotationParams; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.sample.Sample; @@ -45,7 +46,7 @@ * This class contains methods for the Sample webservices. * PATH: samples */ -public class SampleClient extends AbstractParentClient { +public class SampleClient extends ParentClient { public SampleClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/StudyClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/StudyClient.java index a073df3855..7f70388244 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/StudyClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/StudyClient.java @@ -17,9 +17,10 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.AclEntryList; import org.opencb.opencga.core.models.audit.AuditRecord; import org.opencb.opencga.core.models.job.Job; @@ -57,7 +58,7 @@ * This class contains methods for the Study webservices. * PATH: studies */ -public class StudyClient extends AbstractParentClient { +public class StudyClient extends ParentClient { public StudyClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/UserClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/UserClient.java index ee290b40d2..bf19ad07b0 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/UserClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/UserClient.java @@ -17,9 +17,10 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.ConfigUpdateParams; import org.opencb.opencga.core.models.user.FilterUpdateParams; @@ -46,7 +47,7 @@ * This class contains methods for the User webservices. * PATH: users */ -public class UserClient extends AbstractParentClient { +public class UserClient extends ParentClient { public UserClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java index 6a68ae8ea8..4d8d31c2db 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java @@ -25,9 +25,10 @@ import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryResponse; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByGene; import org.opencb.opencga.core.models.analysis.knockout.KnockoutByIndividual; import org.opencb.opencga.core.models.clinical.ExomiserWrapperParams; @@ -72,7 +73,7 @@ * This class contains methods for the Variant webservices. * PATH: analysis/variant */ -public class VariantClient extends AbstractParentClient { +public class VariantClient extends ParentClient { public VariantClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantOperationClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantOperationClient.java index 97c999d192..5d232f5276 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantOperationClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantOperationClient.java @@ -17,11 +17,12 @@ package org.opencb.opencga.client.rest.clients; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.client.rest.*; +import org.opencb.opencga.core.client.ParentClient; +import org.opencb.opencga.core.config.client.ClientConfiguration; import org.opencb.opencga.core.config.storage.CellBaseConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.operations.variant.JulieParams; import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams; @@ -62,7 +63,7 @@ * This class contains methods for the VariantOperation webservices. * PATH: operation */ -public class VariantOperationClient extends AbstractParentClient { +public class VariantOperationClient extends ParentClient { public VariantOperationClient(String token, ClientConfiguration configuration) { super(token, configuration); diff --git a/opencga-client/src/test/java/org/opencb/opencga/client/config/ClientConfigurationTest.java b/opencga-client/src/test/java/org/opencb/opencga/client/config/ClientConfigurationTest.java deleted file mode 100644 index 93bb369848..0000000000 --- a/opencga-client/src/test/java/org/opencb/opencga/client/config/ClientConfigurationTest.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2015-2017 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.opencb.opencga.client.config; - -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Created by imedina on 04/05/16. - */ -@Category(ShortTests.class) -public class ClientConfigurationTest { - - @Test - public void testDefault() { - - ClientConfiguration config = new ClientConfiguration(); - List hostConfigs = new ArrayList<>(); - hostConfigs.add(new HostConfig("opencga", "localhost:9090/opencga")); - RestConfig restConfig = new RestConfig(hostConfigs, true, new QueryRestConfig(200, 2000)); - GrpcConfig grpcConfig = new GrpcConfig("localhost:9091"); - - config.setRest(restConfig); - config.setGrpc(grpcConfig); - - try { - config.serialize(new FileOutputStream("/tmp/client-configuration-test.yml")); - } catch (IOException e) { - e.printStackTrace(); - } - } - - @Test - public void testLoad() throws Exception { - ClientConfiguration storageConfiguration = - ClientConfiguration.load(getClass().getResource("/client-configuration.yml").openStream()); - System.out.println("clientConfiguration = " + storageConfiguration); - } -} \ No newline at end of file diff --git a/opencga-client/src/test/java/org/opencb/opencga/client/rest/OpenCGAClientTest.java b/opencga-client/src/test/java/org/opencb/opencga/client/rest/OpenCGAClientTest.java deleted file mode 100644 index 45cb56c91d..0000000000 --- a/opencga-client/src/test/java/org/opencb/opencga/client/rest/OpenCGAClientTest.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.opencb.opencga.client.rest; - -import io.jsonwebtoken.Jwts; -import io.jsonwebtoken.SignatureAlgorithm; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import java.time.Instant; -import java.util.Date; - -import static org.junit.Assert.assertEquals; - -@Category(ShortTests.class) -public class OpenCGAClientTest { - - @Test - public void testGetUserFromToken() { - String token = Jwts.builder().signWith(SignatureAlgorithm.HS256, "dummydummydummydummydummydummydummydummydummydummydummydummy") - .setExpiration(Date.from(Instant.now().plusMillis(1000))) - .setSubject("joe") - .compact(); - - assertEquals("joe", OpenCGAClient.getUserFromToken(token)); - } - - @Test - public void testGetUserFromTokenExpired() { - String token = Jwts.builder().signWith(SignatureAlgorithm.HS256, "dummydummydummydummydummydummydummydummydummydummydummydummy") - .setExpiration(Date.from(Instant.ofEpochMilli(System.currentTimeMillis() - 1000))) - .setSubject("joe") - .compact(); - - assertEquals("joe", OpenCGAClient.getUserFromToken(token)); - } -} \ No newline at end of file diff --git a/opencga-client/src/test/java/org/opencb/opencga/client/rest/UserClientTest.java b/opencga-client/src/test/java/org/opencb/opencga/client/rest/UserClientTest.java deleted file mode 100644 index 4a70188070..0000000000 --- a/opencga-client/src/test/java/org/opencb/opencga/client/rest/UserClientTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2015-2017 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.opencb.opencga.client.rest; - -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.ExpectedException; -import org.opencb.opencga.catalog.exceptions.CatalogException; -import org.opencb.opencga.client.exceptions.ClientException; -import org.opencb.opencga.client.rest.clients.UserClient; -import org.opencb.opencga.core.models.user.AuthenticationResponse; -import org.opencb.opencga.core.models.user.LoginParams; -import org.opencb.opencga.core.models.user.PasswordChangeParams; -import org.opencb.opencga.core.models.user.User; -import org.opencb.opencga.core.response.RestResponse; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import static org.junit.Assert.*; - -/** - * Created by imedina on 04/05/16. - */ -@Ignore -@Category(ShortTests.class) -public class UserClientTest extends WorkEnvironmentTest { - - private UserClient userClient; - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Before - public void before() throws Throwable { - super.before(); - userClient = openCGAClient.getUserClient(); - } - - @Test - public void login() throws ClientException { - AuthenticationResponse response = openCGAClient.login(organizationId, "user1", "user1_pass"); - assertEquals(response.getToken(), openCGAClient.getToken()); - - thrown.expect(CatalogException.class); - thrown.expectMessage("Bad user or password"); - openCGAClient.login(organizationId, "user1", "wrong_password"); - } - - @Test - public void logout() throws ClientException { - System.out.println("token = " + userClient.login(new LoginParams(organizationId, "user1", "user1_pass"), null).firstResult().getToken()); - assertNotNull(openCGAClient.getToken()); - openCGAClient.logout(); - assertEquals(null, openCGAClient.getToken()); - } - - @Test - public void get() throws Exception { - RestResponse login = userClient.info(null, null); - assertNotNull(login.firstResult()); - assertEquals(1, login.allResultsSize()); - } - - @Test - public void changePassword() throws Exception { - userClient.password(new PasswordChangeParams("user1", "user1_pass", "user1_newPass")); - String lastSessionId = openCGAClient.getToken(); - AuthenticationResponse response = openCGAClient.login(organizationId, openCGAClient.getUserId(), "user1_newPass"); - assertNotEquals(lastSessionId, response.getToken()); - - thrown.expect(CatalogException.class); - thrown.expectMessage("Bad user or password"); - userClient.password(new PasswordChangeParams("user1", "wrongOldPassword", "anyPassword")); - } - -} \ No newline at end of file diff --git a/opencga-client/src/test/java/org/opencb/opencga/client/rest/WSTestServer.java b/opencga-client/src/test/java/org/opencb/opencga/client/rest/WSTestServer.java deleted file mode 100644 index 29b8ac6486..0000000000 --- a/opencga-client/src/test/java/org/opencb/opencga/client/rest/WSTestServer.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright 2015-2017 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.opencb.opencga.client.rest; - -import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; -import org.glassfish.jersey.server.ResourceConfig; -import org.glassfish.jersey.servlet.ServletContainer; -import org.opencb.opencga.catalog.managers.CatalogManager; -import org.opencb.opencga.catalog.managers.CatalogManagerExternalResource; -import org.opencb.opencga.catalog.managers.CatalogManagerTest; - -import java.io.FileOutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -/** - * Created by imedina on 9/25/14. - */ -public class WSTestServer { - - private Server server; - private String restURL; - public static final int PORT = 8890; - public static final String DATABASE_PREFIX = "opencga_server_test_"; - - private CatalogManager catalogManager; - private CatalogManagerExternalResource catalogManagerResource; - - public void initServer() throws Exception { - - ResourceConfig resourceConfig = new ResourceConfig(); - resourceConfig.packages(false, "org.opencb.opencga.server.ws"); - resourceConfig.property("jersey.config.server.provider.packages", "org.opencb.opencga.server.ws;com.wordnik.swagger.jersey.listing;com.jersey.jaxb;com.fasterxml.jackson.jaxrs.json"); - resourceConfig.property("jersey.config.server.provider.classnames", "org.glassfish.jersey.media.multipart.MultiPartFeature"); - - ServletContainer sc = new ServletContainer(resourceConfig); - ServletHolder sh = new ServletHolder(sc); - - server = new Server(PORT); - - ServletContextHandler context = new ServletContextHandler(server, null, ServletContextHandler.SESSIONS); - context.addServlet(sh, "/opencga/webservices/rest/*"); - - System.err.println("Starting server"); - server.start(); - System.err.println("Waiting for connections"); - System.out.println(server.getState()); - - System.out.println(server.getURI()); - } - - public void shutdownServer() throws Exception { - System.err.println("Shutdown server"); - server.stop(); - server.join(); - } - - public void setUp() throws Exception { - //Create test environment. Override OpenCGA_Home - Path opencgaHome = Paths.get("/tmp/opencga-server-test"); - System.setProperty("app.home", opencgaHome.toString()); -// Config.setOpenCGAHome(opencgaHome.toString()); - - Files.createDirectories(opencgaHome); - Files.createDirectories(opencgaHome.resolve("conf")); - - CatalogManagerTest catalogManagerTest = new CatalogManagerTest(); - catalogManagerResource = catalogManagerTest.catalogManagerResource; - catalogManagerResource.before(); - - catalogManagerResource.getConfiguration().serialize(new FileOutputStream(opencgaHome.resolve("conf").resolve("configuration.yml").toFile())); -// InputStream inputStream = new ByteArrayInputStream((AnalysisJobExecutor.OPENCGA_ANALYSIS_JOB_EXECUTOR + "=LOCAL" + "\n" + -// AnalysisFileIndexer.OPENCGA_ANALYSIS_STORAGE_DATABASE_PREFIX + "=" + DATABASE_PREFIX).getBytes()); -// Files.copy(inputStream, opencgaHome.resolve("conf").resolve("analysis.properties"), StandardCopyOption.REPLACE_EXISTING); -// inputStream = OpenCGAWSServerTest.class.getClassLoader().getResourceAsStream("storage-configuration.yml"); -// Files.copy(inputStream, opencgaHome.resolve("conf").resolve("storage-configuration.yml"), StandardCopyOption.REPLACE_EXISTING); - - catalogManagerTest.setUpCatalogManager(catalogManagerResource.getCatalogManager()); //Clear and setup CatalogDatabase - catalogManager = catalogManagerResource.getCatalogManager(); - } -} - diff --git a/opencga-client/src/test/java/org/opencb/opencga/client/rest/WorkEnvironmentTest.java b/opencga-client/src/test/java/org/opencb/opencga/client/rest/WorkEnvironmentTest.java deleted file mode 100644 index 9e1f4c14bd..0000000000 --- a/opencga-client/src/test/java/org/opencb/opencga/client/rest/WorkEnvironmentTest.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2015-2017 OpenCB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.opencb.opencga.client.rest; - -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.experimental.categories.Category; -import org.junit.rules.ExternalResource; -import org.opencb.opencga.TestParamConstants; -import org.opencb.opencga.catalog.managers.CatalogManager; -import org.opencb.opencga.catalog.utils.CatalogDemo; -import org.opencb.opencga.client.config.ClientConfiguration; -import org.opencb.opencga.core.config.Configuration; -import org.opencb.opencga.core.config.storage.StorageConfiguration; -import org.opencb.opencga.core.testclassification.duration.ShortTests; -import org.opencb.opencga.server.RestServer; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; - -/** - * Created by pfurio on 09/06/16. - */ -@Category(ShortTests.class) -public class WorkEnvironmentTest extends ExternalResource { - - protected String organizationId = "test"; - protected OpenCGAClient openCGAClient; - protected Path opencgaHome; - protected CatalogManager catalogManager; - protected Configuration configuration; - protected StorageConfiguration storageConfiguration; - protected ClientConfiguration clientConfiguration; - protected RestServer restServer; - - @Override - protected void before() throws Throwable { - super.before(); - isolateOpenCGA(); - } - - private void isolateOpenCGA() throws Exception { - opencgaHome = Paths.get("target/test-data").resolve("junit_opencga_home_" + RandomStringUtils.randomAlphabetic(10)); - Files.createDirectories(opencgaHome); - storageConfiguration = StorageConfiguration.load(getClass().getResource("/storage-configuration.yml").openStream()); - configuration = Configuration.load(getClass().getResource("/configuration-test.yml").openStream()); - configuration.setWorkspace(opencgaHome.resolve("sessions").toUri().toString()); - - // Copy the conf files - Files.createDirectories(opencgaHome.resolve("conf")); -// InputStream inputStream = getClass().getResource("/configuration-test.yml").openStream(); -// Files.copy(inputStream, opencgaHome.resolve("conf").resolve("configuration.yml"), StandardCopyOption.REPLACE_EXISTING); - configuration.serialize(new FileOutputStream(opencgaHome.resolve("conf").resolve("configuration.yml").toString())); - - InputStream inputStream = getClass().getResource("/storage-configuration.yml").openStream(); - Files.copy(inputStream, opencgaHome.resolve("conf").resolve("storage-configuration.yml"), StandardCopyOption.REPLACE_EXISTING); - - inputStream = getClass().getResource("/configuration-test.yml").openStream(); - Files.copy(inputStream, opencgaHome.resolve("conf").resolve("configuration.yml"), StandardCopyOption.REPLACE_EXISTING); - - inputStream = getClass().getResource("/analysis.properties").openStream(); - Files.copy(inputStream, opencgaHome.resolve("conf").resolve("analysis.properties"), StandardCopyOption.REPLACE_EXISTING); - - // Copy the configuration and example demo files - Files.createDirectories(opencgaHome.resolve("examples")); - inputStream = new FileInputStream("../opencga-app/app/misc/examples/20130606_g1k.ped"); - Files.copy(inputStream, opencgaHome.resolve("examples").resolve("20130606_g1k.ped"), StandardCopyOption.REPLACE_EXISTING); - - inputStream = new FileInputStream("../opencga-app/app/misc/examples/1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502" + - ".genotypes.vcf.gz"); - Files.copy(inputStream, opencgaHome.resolve("examples") - .resolve("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), StandardCopyOption.REPLACE_EXISTING); - - catalogManager = new CatalogManager(configuration); - - CatalogDemo.createDemoDatabase(catalogManager, organizationId, TestParamConstants.ADMIN_PASSWORD, true); - - restServer = new RestServer(opencgaHome); - restServer.start(); - -// catalogManager = new CatalogManager(configuration); - clientConfiguration = ClientConfiguration.load(getClass().getResourceAsStream("/client-configuration-test.yml")); - openCGAClient = new OpenCGAClient(organizationId, "user1", "user1_pass", clientConfiguration); - } - - @Override - protected void after() { - super.after(); - try { - restServer.stop(); - catalogManager.close(); - } catch (Exception e) { - e.printStackTrace(); - } - } -} diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index c1fb9ac506..2a00c35954 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -116,6 +116,23 @@ com.github.samtools htsjdk + + net.minidev + json-smart + + + org.glassfish.jersey.core + jersey-client + + + org.glassfish.jersey.media + jersey-media-multipart + + + org.glassfish.jersey.media + jersey-media-json-jackson + + org.apache.httpcomponents diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java b/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java index c10ef15649..9c139140c7 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java @@ -44,6 +44,8 @@ public class FieldConstants { public static final String ORGANIZATION_OWNER_DESCRIPTION = "Owner of the organization."; public static final String ORGANIZATION_ADMINS_DESCRIPTION = "Administrative users of the organization."; public static final String ORGANIZATION_PROJECTS_DESCRIPTION = "Projects the organization holds."; + public static final String ORGANIZATION_FEDERATION_DESCRIPTION = "Object containing any federated clients or servers."; + public static final String ORGANIZATION_CREDENTIALS_DESCRIPTION = "List of credentials that may be used by the Organization."; public static final String ORGANIZATION_NOTES_DESCRIPTION = "Notes of organization scope."; // public static final String ORGANIZATION_AUTHENTICATION_ORIGINS_DESCRIPTION = "Authentication origins used by the organization. This " // + "contains all the configuration necessary to be able to communicate with the external authentication origins."; @@ -69,6 +71,26 @@ public class FieldConstants { public static final String NOTES_USER_ID_PARAM = "userId"; public static final String NOTES_VISIBILITY_PARAM = "visibility"; + //Federation + public static final String FEDERATION_CLIENT_ID_DESCRIPTION = "Unique ID to identify the federation server."; + public static final String FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION = "Description of the federation server."; + public static final String FEDERATION_CLIENT_EMAIL_DESCRIPTION = "Email of the federation server."; + public static final String FEDERATION_CLIENT_URL_DESCRIPTION = "URL of the federation server."; + public static final String FEDERATION_CLIENT_ORGANIZATION_ID_DESCRIPTION = "Organization ID from the federation server."; + public static final String FEDERATION_CLIENT_USER_ID_DESCRIPTION = "User ID to access the federation server."; + public static final String FEDERATION_CLIENT_PASSWORD_DESCRIPTION = "User password to access the federation server."; + public static final String FEDERATION_CLIENT_TOKEN_DESCRIPTION = "Token to access the federation server."; + public static final String FEDERATION_CLIENT_SECRET_KEY_DESCRIPTION = "Secret key auto-generated by the federation server."; + + public static final String FEDERATION_SERVER_ID_DESCRIPTION = "Unique ID to identify the federation client."; + public static final String FEDERATION_SERVER_DESCRIPTION_DESCRIPTION = "Description of the federation client."; + public static final String FEDERATION_SERVER_EMAIL_DESCRIPTION = "Contact email of the federation client."; + public static final String FEDERATION_SERVER_USER_ID_DESCRIPTION = "User ID to be used by the federation client."; + public static final String FEDERATION_SERVER_SECRET_KEY_DESCRIPTION = "Secret key shared with the federation client to gain access" + + " to the server."; + public static final String FEDERATION_SERVER_ACTIVE_DESCRIPTION = "Flag to indicate if the federation client account is active."; + public static final String FEDERATION_SERVER_EXPIRATION_TIME_DESCRIPTION = "Expiration time of the federation client account."; + //Sample public static final String SAMPLE_ID_DESCRIPTION = "Sample ID."; public static final String SAMPLE_PROCESSING_DESCRIPTION = "Describes how the sample was processed in the lab."; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java b/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java new file mode 100644 index 0000000000..72c24969f1 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java @@ -0,0 +1,86 @@ +package org.opencb.opencga.core.client; + +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.config.client.ClientConfiguration; +import org.opencb.opencga.core.exceptions.ClientException; +import org.opencb.opencga.core.models.user.AuthenticationResponse; +import org.opencb.opencga.core.models.user.LoginParams; +import org.opencb.opencga.core.response.RestResponse; + +import java.util.Map; + +public class GenericClient extends ParentClient { + + public GenericClient(String token, ClientConfiguration clientConfiguration) { + super(token, clientConfiguration); + } + + /** + * Get an anonymous token to gain access to the system. + * @param organization Organization id. + * @return a RestResponse object. + * @throws ClientException ClientException if there is any server error. + */ + public RestResponse anonymous(String organization) throws ClientException { + ObjectMap params = new ObjectMap(); + params.putIfNotNull("organization", organization); + return execute("users", null, null, null, "anonymous", params, POST, AuthenticationResponse.class); + } + + /** + * Get identified and gain access to the system. + * @param data JSON containing the authentication parameters. + * @return a RestResponse object. + * @throws ClientException ClientException if there is any server error. + */ + public RestResponse login(LoginParams data) throws ClientException { + ObjectMap params = new ObjectMap(); + params.put("body", data); + return execute("users", null, null, null, "login", params, POST, AuthenticationResponse.class); + } + + @Override + public RestResponse execute(String category, String action, Map params, String method, Class clazz) + throws ClientException { + return super.execute(category, action, params, method, clazz); + } + + public RestResponse execute(String category, String action, Map params, Object body, String method, + Class clazz) throws ClientException { + if (body != null) { + params = params == null ? new ObjectMap() : params; + params.put("body", body); + } + return super.execute(category, action, params, method, clazz); + } + + @Override + public RestResponse execute(String category, String id, String action, Map params, String method, Class clazz) + throws ClientException { + return super.execute(category, id, action, params, method, clazz); + } + + public RestResponse execute(String category, String id, String action, Map params, Object body, String method, + Class clazz) throws ClientException { + if (body != null) { + params = params == null ? new ObjectMap() : params; + params.put("body", body); + } + return super.execute(category, id, action, params, method, clazz); + } + + @Override + public RestResponse execute(String category1, String id1, String category2, String id2, String action, + Map params, String method, Class clazz) throws ClientException { + return super.execute(category1, id1, category2, id2, action, params, method, clazz); + } + + public RestResponse execute(String category1, String id1, String category2, String id2, String action, + Map params, Object body, String method, Class clazz) throws ClientException { + if (body != null) { + params = params == null ? new ObjectMap() : params; + params.put("body", body); + } + return super.execute(category1, id1, category2, id2, action, params, method, clazz); + } +} diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/AbstractParentClient.java b/opencga-core/src/main/java/org/opencb/opencga/core/client/ParentClient.java similarity index 96% rename from opencga-client/src/main/java/org/opencb/opencga/client/rest/AbstractParentClient.java rename to opencga-core/src/main/java/org/opencb/opencga/core/client/ParentClient.java index 007c5f7f2e..1e92b808a0 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/AbstractParentClient.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/client/ParentClient.java @@ -14,7 +14,7 @@ * limitations under the License. */ - package org.opencb.opencga.client.rest; + package org.opencb.opencga.core.client; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonProcessingException; @@ -32,9 +32,9 @@ import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; - import org.opencb.opencga.client.config.ClientConfiguration; - import org.opencb.opencga.client.exceptions.ClientException; import org.opencb.opencga.core.common.JacksonUtils; + import org.opencb.opencga.core.config.client.ClientConfiguration; + import org.opencb.opencga.core.exceptions.ClientException; import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.response.QueryType; import org.opencb.opencga.core.response.RestResponse; @@ -63,18 +63,18 @@ /** * Created by imedina on 04/05/16. */ - public abstract class AbstractParentClient { - - protected static final String GET = "GET"; - protected static final String POST = "POST"; - protected static final String DELETE = "DELETE"; - protected static final int DEFAULT_BATCH_SIZE = 200; - protected static final int DEFAULT_LIMIT = 2000; - protected static final int DEFAULT_SKIP = 0; - protected static final int DEFAULT_CONNECT_TIMEOUT = 1000; - protected static final int DEFAULT_READ_TIMEOUT = 30000; - protected static final int DEFAULT_UPLOAD_TIMEOUT = 5400000; - protected static final String COOKIES = "cookies"; + public abstract class ParentClient { + + public static final String GET = "GET"; + public static final String POST = "POST"; + public static final String DELETE = "DELETE"; + public static final int DEFAULT_BATCH_SIZE = 200; + public static final int DEFAULT_LIMIT = 2000; + public static final int DEFAULT_SKIP = 0; + public static final int DEFAULT_CONNECT_TIMEOUT = 1000; + public static final int DEFAULT_READ_TIMEOUT = 30000; + public static final int DEFAULT_UPLOAD_TIMEOUT = 5400000; + public static final String COOKIES = "cookies"; protected final Client client; protected final ObjectMapper jsonObjectMapper; @@ -86,12 +86,12 @@ public abstract class AbstractParentClient { protected String token; private boolean throwExceptionOnError = false; - protected AbstractParentClient(String token, ClientConfiguration clientConfiguration) { + protected ParentClient(String token, ClientConfiguration clientConfiguration) { this.token = token; this.clientConfiguration = clientConfiguration; this.logger = LoggerFactory.getLogger(this.getClass()); - this.privateLogger = LoggerFactory.getLogger(AbstractParentClient.class); + this.privateLogger = LoggerFactory.getLogger(ParentClient.class); this.client = createRestClient(); @@ -161,7 +161,7 @@ public boolean verify(String hostname, SSLSession sslSession) { return clientBuilder.build(); } - public AbstractParentClient setThrowExceptionOnError(boolean throwExceptionOnError) { + public ParentClient setThrowExceptionOnError(boolean throwExceptionOnError) { this.throwExceptionOnError = throwExceptionOnError; return this; } @@ -666,7 +666,7 @@ protected void checkErrors(RestResponse restResponse, Response.StatusType } } - public AbstractParentClient setToken(String token) { + public ParentClient setToken(String token) { this.token = token; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java index e5f542454e..b396fed114 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java @@ -35,6 +35,10 @@ import org.opencb.opencga.core.models.common.mixins.VariantStatsJsonMixin; import org.opencb.opencga.core.models.family.Family; import org.opencb.opencga.core.models.family.FamilyMixin; +import org.opencb.opencga.core.models.federation.FederationServer; +import org.opencb.opencga.core.models.federation.FederationServerMixin; +import org.opencb.opencga.core.models.federation.FederationClient; +import org.opencb.opencga.core.models.federation.FederationClientMixin; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.models.file.FileMixin; import org.opencb.opencga.core.models.individual.Individual; @@ -109,6 +113,8 @@ private static ObjectMapper generateUpdateObjectMapper(JsonFactory jf) { objectMapper.addMixIn(VariableSet.class, PrivateUidMixin.class); objectMapper.addMixIn(ClinicalAnalysis.class, PrivateUidMixin.class); objectMapper.addMixIn(Interpretation.class, PrivateUidMixin.class); + objectMapper.addMixIn(FederationServer.class, FederationServerMixin.class); + objectMapper.addMixIn(FederationClient.class, FederationClientMixin.class); objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -143,6 +149,8 @@ private static ObjectMapper generateOpenCGAObjectMapper(JsonFactory jf) { objectMapper.addMixIn(VariableSet.class, PrivateUidMixin.class); objectMapper.addMixIn(ClinicalAnalysis.class, PrivateUidMixin.class); objectMapper.addMixIn(Interpretation.class, PrivateUidMixin.class); + objectMapper.addMixIn(FederationServer.class, FederationServerMixin.class); + objectMapper.addMixIn(FederationClient.class, FederationClientMixin.class); objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); return objectMapper; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/JwtUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java similarity index 62% rename from opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/JwtUtils.java rename to opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java index 8f40b5e9ef..f732114f62 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/JwtUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java @@ -1,16 +1,18 @@ -package org.opencb.opencga.catalog.utils; +package org.opencb.opencga.core.common; import io.jsonwebtoken.SignatureAlgorithm; import net.minidev.json.JSONObject; import net.minidev.json.parser.JSONParser; import net.minidev.json.parser.ParseException; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.opencb.opencga.core.models.JwtPayload; import javax.crypto.spec.SecretKeySpec; -import java.util.Base64; -import java.util.Calendar; -import java.util.Date; +import java.util.*; + +import static org.opencb.opencga.core.models.JwtPayload.FEDERATIONS; public class JwtUtils { @@ -50,4 +52,22 @@ public static Date getExpirationDate(String token) { } return res; } + + public static List getFederations(Map claims) { + List o = (List) claims.get(FEDERATIONS); + if (CollectionUtils.isNotEmpty(o)) { + List federationList = new ArrayList<>(o.size()); + for (Object federationObject : o) { + if (federationObject instanceof Map) { + String id = ((Map) federationObject).get("id"); + List projectIds = ((Map>) federationObject).get("projectIds"); + List studyIds = ((Map>) federationObject).get("studyIds"); + federationList.add(new JwtPayload.Federation(id, projectIds, studyIds)); + } + } + return federationList; + } else { + return Collections.emptyList(); + } + } } diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/config/ClientConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/ClientConfiguration.java similarity index 95% rename from opencga-client/src/main/java/org/opencb/opencga/client/config/ClientConfiguration.java rename to opencga-core/src/main/java/org/opencb/opencga/core/config/client/ClientConfiguration.java index e7fa5f54a9..c84e7ceaab 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/config/ClientConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/ClientConfiguration.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.opencb.opencga.client.config; +package org.opencb.opencga.core.config.client; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.DeserializationFeature; @@ -22,7 +22,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.commons.collections4.CollectionUtils; import org.opencb.commons.utils.FileUtils; -import org.opencb.opencga.client.exceptions.ClientException; +import org.opencb.opencga.core.exceptions.ClientException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +30,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Path; +import java.util.Collections; import java.util.Map; /** @@ -49,6 +50,12 @@ public ClientConfiguration() { logger = LoggerFactory.getLogger(ClientConfiguration.class); } + public ClientConfiguration(String opencgaUrl) { + logger = LoggerFactory.getLogger(ClientConfiguration.class); + HostConfig hostConfig = new HostConfig("opencga", opencgaUrl); + this.rest = new RestConfig(Collections.singletonList(hostConfig), false, null); + } + public static ClientConfiguration load(Path clientConfigurationPath) throws IOException { logger.debug("Loading client configuration file from '{}'", clientConfigurationPath.toString()); InputStream inputStream = FileUtils.newInputStream(clientConfigurationPath); diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/config/GrpcConfig.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/GrpcConfig.java similarity index 95% rename from opencga-client/src/main/java/org/opencb/opencga/client/config/GrpcConfig.java rename to opencga-core/src/main/java/org/opencb/opencga/core/config/client/GrpcConfig.java index 1f69fdb96a..4cc7a735bb 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/config/GrpcConfig.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/GrpcConfig.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.opencb.opencga.client.config; +package org.opencb.opencga.core.config.client; /** * Created by imedina on 04/05/16. diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/config/HostConfig.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/HostConfig.java similarity index 94% rename from opencga-client/src/main/java/org/opencb/opencga/client/config/HostConfig.java rename to opencga-core/src/main/java/org/opencb/opencga/core/config/client/HostConfig.java index 0fb82197b3..6c053b5a47 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/config/HostConfig.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/HostConfig.java @@ -1,4 +1,4 @@ -package org.opencb.opencga.client.config; +package org.opencb.opencga.core.config.client; public class HostConfig { diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/config/QueryRestConfig.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/QueryRestConfig.java similarity index 96% rename from opencga-client/src/main/java/org/opencb/opencga/client/config/QueryRestConfig.java rename to opencga-core/src/main/java/org/opencb/opencga/core/config/client/QueryRestConfig.java index da06d95372..cbbe5367d3 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/config/QueryRestConfig.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/QueryRestConfig.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.opencb.opencga.client.config; +package org.opencb.opencga.core.config.client; public class QueryRestConfig { diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/config/RestConfig.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/RestConfig.java similarity index 98% rename from opencga-client/src/main/java/org/opencb/opencga/client/config/RestConfig.java rename to opencga-core/src/main/java/org/opencb/opencga/core/config/client/RestConfig.java index 8b7dca8903..a518f1c938 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/config/RestConfig.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/client/RestConfig.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.opencb.opencga.client.config; +package org.opencb.opencga.core.config.client; import java.util.List; diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/exceptions/ClientException.java b/opencga-core/src/main/java/org/opencb/opencga/core/exceptions/ClientException.java similarity index 95% rename from opencga-client/src/main/java/org/opencb/opencga/client/exceptions/ClientException.java rename to opencga-core/src/main/java/org/opencb/opencga/core/exceptions/ClientException.java index 9454ba16e0..836ff8ae4f 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/exceptions/ClientException.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/exceptions/ClientException.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.opencb.opencga.client.exceptions; +package org.opencb.opencga.core.exceptions; import org.opencb.commons.datastore.core.result.Error; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java index 4d8df0f6f0..d9dca79389 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java @@ -4,25 +4,29 @@ import org.apache.commons.lang3.StringUtils; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import java.util.Base64; import java.util.Date; +import java.util.List; public class JwtPayload { private final String userId; private final String organization; private final AuthenticationOrigin.AuthenticationType authOrigin; - private final String issuer; // Issuer of the JWT token. - private final Date issuedAt; // Time when the JWT was issued. - private final Date expirationTime; // Expiration time of the JWT. + private final String issuer; // Issuer of the JWT token. + private final Date issuedAt; // Time when the JWT was issued. + private final Date expirationTime; // Expiration time of the JWT. + private final List federations; // Federation information containing the projects and studies the user has access to. private final String token; public static final String AUTH_ORIGIN = "authOrigin"; + public static final String FEDERATIONS = "federations"; public JwtPayload(String userId, String organization, AuthenticationOrigin.AuthenticationType authOrigin, String issuer, Date issuedAt, - Date expirationTime, String token) { + Date expirationTime, List federationList, String token) { this.token = token; this.userId = userId; this.organization = organization; @@ -30,6 +34,7 @@ public JwtPayload(String userId, String organization, AuthenticationOrigin.Authe this.issuer = issuer; this.issuedAt = issuedAt; this.expirationTime = expirationTime; + this.federations = federationList; } /** @@ -81,6 +86,8 @@ public JwtPayload(String token) { } else { this.expirationTime = null; } + + this.federations = JwtUtils.getFederations(claimsMap); } } @@ -93,6 +100,7 @@ public String toString() { sb.append(", issuer='").append(issuer).append('\''); sb.append(", issuedAt=").append(issuedAt); sb.append(", expirationTime=").append(expirationTime); + sb.append(", federations=").append(federations); sb.append(", token='").append(token).append('\''); sb.append('}'); return sb.toString(); @@ -133,4 +141,61 @@ public Date getExpirationTime() { public String getToken() { return token; } + + public List getFederations() { + return federations; + } + + public static class Federation { + private String id; + private List projectIds; + private List studyIds; + + public Federation() { + } + + public Federation(String id, List projectIds, List studyIds) { + this.id = id; + this.projectIds = projectIds; + this.studyIds = studyIds; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Federation{"); + sb.append("id='").append(id).append('\''); + sb.append(", projectIds=").append(projectIds); + sb.append(", studyIds=").append(studyIds); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public Federation setId(String id) { + this.id = id; + return this; + } + + public List getProjectIds() { + return projectIds; + } + + public Federation setProjectIds(List projectIds) { + this.projectIds = projectIds; + return this; + } + + public List getStudyIds() { + return studyIds; + } + + public Federation setStudyIds(List studyIds) { + this.studyIds = studyIds; + return this; + } + } + } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java index 2777222715..708970ca24 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java @@ -259,6 +259,9 @@ public enum Action { IMPORT_EXTERNAL_GROUP_OF_USERS, SYNC_EXTERNAL_GROUP_OF_USERS, + CREATE_FEDERATION_CLIENT, + EXPOSE_FEDERATION_SERVER, + // RGA RESET_RGA_INDEXES, UPDATE_RGA_INDEX, diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java new file mode 100644 index 0000000000..69e82f3958 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java @@ -0,0 +1,44 @@ +package org.opencb.opencga.core.models.federation; + +import java.util.List; + +public class Federation { + + private List clients; + private List servers; + + public Federation() { + } + + public Federation(List clients, List servers) { + this.clients = clients; + this.servers = servers; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Federation{"); + sb.append("clients=").append(clients); + sb.append(", servers=").append(servers); + sb.append('}'); + return sb.toString(); + } + + public List getClients() { + return clients; + } + + public Federation setClients(List clients) { + this.clients = clients; + return this; + } + + public List getServers() { + return servers; + } + + public Federation setServers(List servers) { + this.servers = servers; + return this; + } +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java new file mode 100644 index 0000000000..50190c1003 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java @@ -0,0 +1,145 @@ +package org.opencb.opencga.core.models.federation; + +import org.opencb.commons.annotations.DataField; +import org.opencb.opencga.core.api.FieldConstants; + +public class FederationClient { + + @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, + description = FieldConstants.FEDERATION_CLIENT_ID_DESCRIPTION) + private String id; + + @DataField(id = "description", description = FieldConstants.FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION) + private String description; + + @DataField(id = "email", description = FieldConstants.FEDERATION_CLIENT_EMAIL_DESCRIPTION) + private String email; + + @DataField(id = "url", description = FieldConstants.FEDERATION_CLIENT_URL_DESCRIPTION) + private String url; + + @DataField(id = "organizationId", description = FieldConstants.FEDERATION_CLIENT_ORGANIZATION_ID_DESCRIPTION) + private String organizationId; + + @DataField(id = "userId", description = FieldConstants.FEDERATION_CLIENT_USER_ID_DESCRIPTION) + private String userId; + + @DataField(id = "password", description = FieldConstants.FEDERATION_CLIENT_PASSWORD_DESCRIPTION) + private String password; + + @DataField(id = "token", description = FieldConstants.FEDERATION_CLIENT_TOKEN_DESCRIPTION) + private String token; + + @DataField(id = "secretKey", description = FieldConstants.FEDERATION_CLIENT_SECRET_KEY_DESCRIPTION) + private String secretKey; + + public FederationClient() { + } + + public FederationClient(String id, String description, String email, String url, String organizationId, String userId, String password, + String secretKey) { + this.id = id; + this.description = description; + this.email = email; + this.url = url; + this.organizationId = organizationId; + this.userId = userId; + this.password = password; + this.secretKey = secretKey; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("FederationClient{"); + sb.append("id='").append(id).append('\''); + sb.append(", description='").append(description).append('\''); + sb.append(", email='").append(email).append('\''); + sb.append(", url='").append(url).append('\''); + sb.append(", organizationId='").append(organizationId).append('\''); + sb.append(", userId='").append(userId).append('\''); + sb.append(", password='").append("xxxxxxxx").append('\''); + sb.append(", secretKey='").append("xxxxxxxx").append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public FederationClient setId(String id) { + this.id = id; + return this; + } + + public String getDescription() { + return description; + } + + public FederationClient setDescription(String description) { + this.description = description; + return this; + } + + public String getEmail() { + return email; + } + + public FederationClient setEmail(String email) { + this.email = email; + return this; + } + + public String getUrl() { + return url; + } + + public FederationClient setUrl(String url) { + this.url = url; + return this; + } + + public String getOrganizationId() { + return organizationId; + } + + public FederationClient setOrganizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + public String getUserId() { + return userId; + } + + public FederationClient setUserId(String userId) { + this.userId = userId; + return this; + } + + public String getPassword() { + return password; + } + + public FederationClient setPassword(String password) { + this.password = password; + return this; + } + + public String getSecretKey() { + return secretKey; + } + + public FederationClient setSecretKey(String secretKey) { + this.secretKey = secretKey; + return this; + } + + public String getToken() { + return token; + } + + public void setToken(String token) { + this.token = token; + } +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java new file mode 100644 index 0000000000..5feada2abd --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java @@ -0,0 +1,7 @@ +package org.opencb.opencga.core.models.federation; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties({"password", "secretKey"}) +public class FederationClientMixin { +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java new file mode 100644 index 0000000000..0379fcbbe8 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java @@ -0,0 +1,120 @@ +package org.opencb.opencga.core.models.federation; + +import org.opencb.commons.annotations.DataField; +import org.opencb.opencga.core.api.FieldConstants; + +public class FederationServer { + + @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, + description = FieldConstants.FEDERATION_SERVER_ID_DESCRIPTION) + private String id; + + @DataField(id = "description", description = FieldConstants.FEDERATION_SERVER_DESCRIPTION_DESCRIPTION) + private String description; + + @DataField(id = "email", description = FieldConstants.FEDERATION_SERVER_EMAIL_DESCRIPTION) + private String email; + + @DataField(id = "userId", description = FieldConstants.FEDERATION_SERVER_USER_ID_DESCRIPTION) + private String userId; + + @DataField(id = "active", description = FieldConstants.FEDERATION_SERVER_ACTIVE_DESCRIPTION) + private boolean active; + + @DataField(id = "expirationTime", description = FieldConstants.FEDERATION_SERVER_EXPIRATION_TIME_DESCRIPTION) + private String expirationTime; + + @DataField(id = "secretKey", description = FieldConstants.FEDERATION_SERVER_SECRET_KEY_DESCRIPTION) + private String secretKey; + + public FederationServer() { + } + + public FederationServer(String id, String description, String email, String userId, boolean active, String expirationTime, + String secretKey) { + this.id = id; + this.description = description; + this.email = email; + this.userId = userId; + this.active = active; + this.expirationTime = expirationTime; + this.secretKey = secretKey; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("FederationServer{"); + sb.append("id='").append(id).append('\''); + sb.append(", description='").append(description).append('\''); + sb.append(", email='").append(email).append('\''); + sb.append(", userId='").append(userId).append('\''); + sb.append(", active=").append(active); + sb.append(", expirationTime='").append(expirationTime).append('\''); + sb.append(", secretKey='").append("xxxxxxxx").append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public FederationServer setId(String id) { + this.id = id; + return this; + } + + public String getDescription() { + return description; + } + + public FederationServer setDescription(String description) { + this.description = description; + return this; + } + + public String getEmail() { + return email; + } + + public FederationServer setEmail(String email) { + this.email = email; + return this; + } + + public String getUserId() { + return userId; + } + + public FederationServer setUserId(String userId) { + this.userId = userId; + return this; + } + + public boolean isActive() { + return active; + } + + public FederationServer setActive(boolean active) { + this.active = active; + return this; + } + + public String getExpirationTime() { + return expirationTime; + } + + public FederationServer setExpirationTime(String expirationTime) { + this.expirationTime = expirationTime; + return this; + } + + public String getSecretKey() { + return secretKey; + } + + public FederationServer setSecretKey(String secretKey) { + this.secretKey = secretKey; + return this; + } +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java new file mode 100644 index 0000000000..4f9eb24e16 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java @@ -0,0 +1,77 @@ +package org.opencb.opencga.core.models.federation; + +import org.opencb.commons.annotations.DataField; +import org.opencb.opencga.core.api.FieldConstants; + +public class FederationServerCreateParams { + + @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, + description = FieldConstants.FEDERATION_CLIENT_ID_DESCRIPTION) + private String id; + + @DataField(id = "description", description = FieldConstants.FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION) + private String description; + + @DataField(id = "email", description = FieldConstants.FEDERATION_CLIENT_EMAIL_DESCRIPTION) + private String email; + + @DataField(id = "userId", description = FieldConstants.FEDERATION_CLIENT_USER_ID_DESCRIPTION) + private String userId; + + public FederationServerCreateParams() { + } + + public FederationServerCreateParams(String id, String description, String email, String userId) { + this.id = id; + this.description = description; + this.email = email; + this.userId = userId; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("FederationServerCreateParams{"); + sb.append("id='").append(id).append('\''); + sb.append(", description='").append(description).append('\''); + sb.append(", email='").append(email).append('\''); + sb.append(", userId='").append(userId).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public FederationServerCreateParams setId(String id) { + this.id = id; + return this; + } + + public String getDescription() { + return description; + } + + public FederationServerCreateParams setDescription(String description) { + this.description = description; + return this; + } + + public String getEmail() { + return email; + } + + public FederationServerCreateParams setEmail(String email) { + this.email = email; + return this; + } + + public String getUserId() { + return userId; + } + + public FederationServerCreateParams setUserId(String userId) { + this.userId = userId; + return this; + } +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java new file mode 100644 index 0000000000..3fdbe1af91 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java @@ -0,0 +1,7 @@ +package org.opencb.opencga.core.models.federation; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties({"secretKey"}) +public class FederationServerMixin { +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/organizations/Organization.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/organizations/Organization.java index 55520c9b83..f019586f33 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/organizations/Organization.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/organizations/Organization.java @@ -3,6 +3,7 @@ import org.opencb.commons.annotations.DataClass; import org.opencb.commons.annotations.DataField; import org.opencb.opencga.core.api.FieldConstants; +import org.opencb.opencga.core.models.federation.Federation; import org.opencb.opencga.core.models.notes.Note; import org.opencb.opencga.core.models.project.Project; @@ -39,6 +40,9 @@ public class Organization { @DataField(id = "projects", description = FieldConstants.ORGANIZATION_PROJECTS_DESCRIPTION) private List projects; + @DataField(id = "federation", description = FieldConstants.ORGANIZATION_FEDERATION_DESCRIPTION) + private Federation federation; + @DataField(id = "notes", description = FieldConstants.ORGANIZATION_NOTES_DESCRIPTION) private List notes; @@ -80,6 +84,7 @@ public String toString() { sb.append(", creationDate='").append(creationDate).append('\''); sb.append(", modificationDate='").append(modificationDate).append('\''); sb.append(", projects=").append(projects); + sb.append(", federation=").append(federation); sb.append(", notes=").append(notes); sb.append(", configuration=").append(configuration); sb.append(", internal=").append(internal); @@ -151,6 +156,15 @@ public Organization setModificationDate(String modificationDate) { return this; } + public Federation getFederation() { + return federation; + } + + public Organization setFederation(Federation federation) { + this.federation = federation; + return this; + } + public List getProjects() { return projects; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java index 293ade331b..16b2bca684 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java @@ -113,13 +113,19 @@ public Account setAuthentication(AuthenticationOrigin authentication) { public static class AuthenticationOrigin { private String id; + private boolean federation; private boolean application; public AuthenticationOrigin() { } public AuthenticationOrigin(String id, boolean application) { + this(id, false, application); + } + + public AuthenticationOrigin(String id, boolean federation, boolean application) { this.id = id; + this.federation = federation; this.application = application; } @@ -127,6 +133,7 @@ public AuthenticationOrigin(String id, boolean application) { public String toString() { final StringBuilder sb = new StringBuilder("AuthenticationOrigin{"); sb.append("id='").append(id).append('\''); + sb.append(", federation=").append(federation); sb.append(", application=").append(application); sb.append('}'); return sb.toString(); @@ -141,6 +148,14 @@ public AuthenticationOrigin setId(String id) { return this; } + public boolean isFederation() { + return federation; + } + + public void setFederation(boolean federation) { + this.federation = federation; + } + public boolean getApplication() { return application; } diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/generator/writers/cli/ExecutorsCliRestApiWriter.java b/opencga-server/src/main/java/org/opencb/opencga/server/generator/writers/cli/ExecutorsCliRestApiWriter.java index faa3dd8626..8dbbfe1fe3 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/generator/writers/cli/ExecutorsCliRestApiWriter.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/generator/writers/cli/ExecutorsCliRestApiWriter.java @@ -58,7 +58,7 @@ protected String getClassImports(String key) { imports.add("com.fasterxml.jackson.databind.DeserializationFeature"); imports.add("org.opencb.opencga.app.cli.main.*"); imports.add("org.opencb.opencga.core.response.RestResponse"); - imports.add("org.opencb.opencga.client.exceptions.ClientException"); + imports.add("org.opencb.opencga.core.exceptions.ClientException"); imports.add("org.opencb.commons.datastore.core.ObjectMap"); imports.add("org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException"); imports.add("org.opencb.opencga.core.common.JacksonUtils"); diff --git a/pom.xml b/pom.xml index 911c6ab2dc..d5d8e72053 100644 --- a/pom.xml +++ b/pom.xml @@ -49,7 +49,7 @@ 4.0.0-SNAPSHOT 6.0.0-SNAPSHOT 4.0.0-SNAPSHOT - 0.2.0 + 0.2.0 2.14.3 2.30.1 @@ -61,7 +61,7 @@ 2.17.2 8.11.3 0.11.2 - + 9.4.53.v20231009 ${jetty.version} 28.0-jre @@ -1429,6 +1429,23 @@ https://uk.ws.zettagenomics.com/cellbase/ v5.8
+ + + + org.apache.maven.plugins + maven-source-plugin + 3.2.0 + + + attach-sources + + jar-no-fork + + + + + + From d0877444a92745a68179c7fdb30c801bb7e60653 Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 14 Jan 2025 15:04:07 +0100 Subject: [PATCH 096/122] catalog: store federation info in token, #TASK-7192 --- .../authentication/AuthenticationManager.java | 61 +++++++++++++++--- .../AzureADAuthenticationManager.java | 8 ++- .../CatalogAuthenticationManager.java | 19 +++--- .../auth/authentication/JwtManager.java | 6 +- .../LDAPAuthenticationManager.java | 21 ++++-- .../SSOAuthenticationManager.java | 19 ++++-- .../azure/AuthenticationFactory.java | 7 +- .../catalog/db/api/ProjectDBAdaptor.java | 1 + .../catalog/db/api/StudyDBAdaptor.java | 1 + .../db/mongodb/ProjectMongoDBAdaptor.java | 1 + .../db/mongodb/StudyMongoDBAdaptor.java | 1 + .../catalog/managers/ProjectManager.java | 6 ++ .../catalog/managers/StudyManager.java | 11 ++-- .../catalog/utils/FederationUtils.java | 32 ++++++++++ .../src/main/resources/catalog-indexes.txt | 4 +- .../authentication/JwtSessionManagerTest.java | 4 +- .../opencga/core/api/FieldConstants.java | 5 ++ .../opencga/core/client/GenericClient.java | 10 +++ .../opencb/opencga/core/common/JwtUtils.java | 6 +- .../opencga/core/models/JwtPayload.java | 18 +++--- .../models/federation/FederationClient.java | 18 +++++- .../federation/FederationClientMixin.java | 2 +- .../federation/FederationClientRef.java | 64 +++++++++++++++++++ .../opencga/core/models/project/Project.java | 25 ++++++-- .../core/models/project/ProjectInternal.java | 17 ++++- .../opencga/core/models/study/Study.java | 12 ++++ .../core/models/study/StudyInternal.java | 18 +++++- .../opencga/server/rest/OpenCGAWSServer.java | 6 +- 28 files changed, 330 insertions(+), 73 deletions(-) create mode 100644 opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/FederationUtils.java create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java index d93cf48c4a..7b25cddf83 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java @@ -17,10 +17,16 @@ package org.opencb.opencga.catalog.auth.authentication; import org.apache.commons.lang3.StringUtils; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.catalog.db.DBAdaptorFactory; +import org.opencb.opencga.catalog.db.api.ProjectDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.models.JwtPayload; +import org.opencb.opencga.core.models.project.Project; +import org.opencb.opencga.core.models.study.Study; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; @@ -30,10 +36,7 @@ import javax.crypto.spec.SecretKeySpec; import java.io.Closeable; import java.security.Key; -import java.util.Collections; -import java.util.Date; -import java.util.List; -import java.util.Map; +import java.util.*; /** * @author Jacobo Coll <jacobo167@gmail.com> @@ -42,6 +45,7 @@ public abstract class AuthenticationManager implements Closeable { protected JwtManager jwtManager; + protected final DBAdaptorFactory dbAdaptorFactory; private final long expiration; protected Logger logger; @@ -49,7 +53,8 @@ public abstract class AuthenticationManager implements Closeable { protected int DEFAULT_CONNECTION_TIMEOUT = 500; // In milliseconds protected int DEFAULT_READ_TIMEOUT = 1000; // In milliseconds - AuthenticationManager(long expiration) { + AuthenticationManager(DBAdaptorFactory dbAdaptorFactory, long expiration) { + this.dbAdaptorFactory = dbAdaptorFactory; this.expiration = expiration; // Any class extending this one must properly initialise JwtManager @@ -165,9 +170,10 @@ public abstract void changePassword(String organizationId, String userId, String * * @param organizationId Organization id. * @param userId user. + * @throws CatalogAuthenticationException CatalogAuthenticationException * @return A token. */ - public String createToken(String organizationId, String userId) { + public String createToken(String organizationId, String userId) throws CatalogAuthenticationException { return createToken(organizationId, userId, Collections.emptyMap(), expiration); } @@ -177,9 +183,10 @@ public String createToken(String organizationId, String userId) { * @param organizationId Organization id. * @param userId user. * @param expiration expiration time. + * @throws CatalogAuthenticationException CatalogAuthenticationException * @return A token. */ - public String createToken(String organizationId, String userId, long expiration) { + public String createToken(String organizationId, String userId, long expiration) throws CatalogAuthenticationException { return createToken(organizationId, userId, Collections.emptyMap(), expiration); } @@ -189,9 +196,10 @@ public String createToken(String organizationId, String userId, long expiration) * @param organizationId Organization id. * @param userId user. * @param claims claims. + * @throws CatalogAuthenticationException CatalogAuthenticationException * @return A token. */ - public String createToken(String organizationId, String userId, Map claims) { + public String createToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { return createToken(organizationId, userId, claims, expiration); } @@ -202,9 +210,11 @@ public String createToken(String organizationId, String userId, Map claims, long expiration); + public abstract String createToken(String organizationId, String userId, Map claims, long expiration) + throws CatalogAuthenticationException; /** * Create a token for the user with no expiration time. @@ -212,12 +222,43 @@ public String createToken(String organizationId, String userId, Map claims); + public abstract String createNonExpiringToken(String organizationId, String userId, Map claims) + throws CatalogAuthenticationException; public Date getExpirationDate(String token) throws CatalogAuthenticationException { return jwtManager.getExpiration(token); } + protected List getFederations(String organizationId, String userId) + throws CatalogAuthenticationException { + Query query = new Query(ProjectDBAdaptor.QueryParams.INTERNAL_FEDERATED.key(), true); + OpenCGAResult result; + try { + result = dbAdaptorFactory.getCatalogProjectDbAdaptor(organizationId).get(query, QueryOptions.empty(), userId); + } catch (Exception e) { + throw new CatalogAuthenticationException("Could not obtain federated projects for user " + userId, e); + } + if (result.getNumResults() == 0) { + return Collections.emptyList(); + } + + // Build the federations list + Map federationMap = new HashMap<>(); + for (Project project : result.getResults()) { + federationMap.putIfAbsent(project.getFederation().getId(), new JwtPayload.FederationJwtPayload(project.getFederation().getId(), + new LinkedList<>(), new LinkedList<>())); + JwtPayload.FederationJwtPayload federation = federationMap.get(project.getFederation().getId()); + federation.getProjectIds().add(project.getFqn()); + federation.getProjectIds().add(project.getUuid()); + for (Study study : project.getStudies()) { + federation.getStudyIds().add(study.getFqn()); + federation.getStudyIds().add(study.getUuid()); + } + } + return new ArrayList<>(federationMap.values()); + } + } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java index 6a3a0480c2..f5445c4628 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.core.config.Configurator; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.catalog.auth.authentication.azure.AuthenticationProvider; +import org.opencb.opencga.catalog.db.DBAdaptorFactory; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.common.TimeUtils; @@ -71,8 +72,9 @@ public class AzureADAuthenticationManager extends AuthenticationManager { private Map> filters; private Map publicKeyMap; - public AzureADAuthenticationManager(AuthenticationOrigin authenticationOrigin) throws CatalogException { - super(0L); + public AzureADAuthenticationManager(AuthenticationOrigin authenticationOrigin, DBAdaptorFactory dbAdaptorFactory) + throws CatalogException { + super(dbAdaptorFactory, 0L); this.originId = authenticationOrigin.getId(); @@ -152,7 +154,7 @@ public static void validateAuthenticationOriginConfiguration(AuthenticationOrigi throw new CatalogException("Unknown authentication type. Expected type '" + AuthenticationOrigin.AuthenticationType.AzureAD + "' but received '" + authenticationOrigin.getType() + "'."); } - AzureADAuthenticationManager azureADAuthenticationManager = new AzureADAuthenticationManager(authenticationOrigin); + AzureADAuthenticationManager azureADAuthenticationManager = new AzureADAuthenticationManager(authenticationOrigin, null); azureADAuthenticationManager.close(); } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java index 61f9fef807..8e42c91911 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java @@ -28,6 +28,7 @@ import org.opencb.opencga.core.common.PasswordUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Email; +import org.opencb.opencga.core.models.JwtPayload; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; @@ -48,14 +49,11 @@ public class CatalogAuthenticationManager extends AuthenticationManager { public static final String OPENCGA = "OPENCGA"; private final Email emailConfig; - private final DBAdaptorFactory dbAdaptorFactory; - public CatalogAuthenticationManager(DBAdaptorFactory dbAdaptorFactory, Email emailConfig, String algorithm, String secretKeyString, long expiration) { - super(expiration); + super(dbAdaptorFactory, expiration); this.emailConfig = emailConfig; - this.dbAdaptorFactory = dbAdaptorFactory; SignatureAlgorithm signatureAlgorithm = SignatureAlgorithm.valueOf(algorithm); Key secretKey = this.converStringToKeyObject(secretKeyString, signatureAlgorithm.getJcaName()); @@ -112,13 +110,18 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) { - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, expiration); + public String createToken(String organizationId, String userId, Map claims, long expiration) + throws CatalogAuthenticationException { + List federations = getFederations(organizationId, userId); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, + expiration); } @Override - public String createNonExpiringToken(String organizationId, String userId, Map claims) { - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, 0L); + public String createNonExpiringToken(String organizationId, String userId, Map claims) + throws CatalogAuthenticationException { + List federations = getFederations(organizationId, userId); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, 0L); } @Override diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java index 81fdc1c2eb..91ccd1619c 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java @@ -17,6 +17,7 @@ package org.opencb.opencga.catalog.auth.authentication; import io.jsonwebtoken.*; +import org.apache.commons.collections4.CollectionUtils; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; @@ -89,7 +90,7 @@ public JwtManager setPublicKey(Key publicKey) { } public String createJWTToken(String organizationId, AuthenticationOrigin.AuthenticationType type, String userId, - Map claims, long expiration) { + Map claims, List federations, long expiration) { long currentTime = System.currentTimeMillis(); JwtBuilder jwtBuilder = Jwts.builder(); @@ -99,6 +100,9 @@ public String createJWTToken(String organizationId, AuthenticationOrigin.Authent if (type != null) { jwtBuilder.addClaims(Collections.singletonMap(AUTH_ORIGIN, type)); } + if (CollectionUtils.isNotEmpty(federations)) { + jwtBuilder.addClaims(Collections.singletonMap(JwtPayload.FEDERATIONS, federations)); + } jwtBuilder.setSubject(userId) .setAudience(organizationId) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java index f79eb85b19..9d1ab62a93 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java @@ -22,11 +22,13 @@ import org.apache.commons.lang3.concurrent.BasicThreadFactory; import org.apache.commons.lang3.time.StopWatch; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.catalog.db.DBAdaptorFactory; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; +import org.opencb.opencga.core.models.JwtPayload; import org.opencb.opencga.core.models.organizations.TokenConfiguration; import org.opencb.opencga.core.models.user.*; import org.opencb.opencga.core.response.OpenCGAResult; @@ -68,8 +70,9 @@ public class LDAPAuthenticationManager extends AuthenticationManager { private String host; private boolean ldaps; - public LDAPAuthenticationManager(AuthenticationOrigin authenticationOrigin, String algorithm, String secretKeyString, long expiration) { - super(expiration); + public LDAPAuthenticationManager(AuthenticationOrigin authenticationOrigin, String algorithm, String secretKeyString, + DBAdaptorFactory dbAdaptorFactory, long expiration) { + super(dbAdaptorFactory, expiration); this.logger = LoggerFactory.getLogger(LDAPAuthenticationManager.class); this.host = authenticationOrigin.getHost(); @@ -135,7 +138,7 @@ public static void validateAuthenticationOriginConfiguration(AuthenticationOrigi TokenConfiguration defaultTokenConfig = TokenConfiguration.init(); LDAPAuthenticationManager ldapAuthenticationManager = new LDAPAuthenticationManager(authenticationOrigin, - defaultTokenConfig.getAlgorithm(), defaultTokenConfig.getSecretKey(), defaultTokenConfig.getExpiration()); + defaultTokenConfig.getAlgorithm(), defaultTokenConfig.getSecretKey(), null, defaultTokenConfig.getExpiration()); DirContext dirContext = ldapAuthenticationManager.getDirContext(ldapAuthenticationManager.getDefaultEnv(), 1); if (dirContext == null) { throw new CatalogException("LDAP: Could not connect to the LDAP server using the provided configuration."); @@ -234,13 +237,17 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) { - return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, expiration); + public String createToken(String organizationId, String userId, Map claims, long expiration) + throws CatalogAuthenticationException { + List federations = getFederations(organizationId, userId); + return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, expiration); } @Override - public String createNonExpiringToken(String organizationId, String userId, Map claims) { - return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, 0L); + public String createNonExpiringToken(String organizationId, String userId, Map claims) + throws CatalogAuthenticationException { + List federations = getFederations(organizationId, userId); + return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, 0L); } /* Private methods */ diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java index e676029e41..2edf385a33 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java @@ -2,9 +2,11 @@ import io.jsonwebtoken.SignatureAlgorithm; import org.apache.commons.lang3.NotImplementedException; +import org.opencb.opencga.catalog.db.DBAdaptorFactory; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.config.AuthenticationOrigin; +import org.opencb.opencga.core.models.JwtPayload; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; @@ -17,8 +19,8 @@ public class SSOAuthenticationManager extends AuthenticationManager { - public SSOAuthenticationManager(String algorithm, String secretKeyString, long expiration) { - super(expiration); + public SSOAuthenticationManager(String algorithm, String secretKeyString, DBAdaptorFactory dbAdaptorFactory, long expiration) { + super(dbAdaptorFactory, expiration); SignatureAlgorithm signatureAlgorithm = SignatureAlgorithm.valueOf(algorithm); Key secretKey = this.converStringToKeyObject(secretKeyString, signatureAlgorithm.getJcaName()); @@ -71,13 +73,18 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) { - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, expiration); + public String createToken(String organizationId, String userId, Map claims, long expiration) + throws CatalogAuthenticationException { + List federations = getFederations(organizationId, userId); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, + expiration); } @Override - public String createNonExpiringToken(String organizationId, String userId, Map claims) { - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, 0L); + public String createNonExpiringToken(String organizationId, String userId, Map claims) + throws CatalogAuthenticationException { + List federations = getFederations(organizationId, userId); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, 0L); } @Override diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java index 7172401fda..c4c7e8daa6 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java @@ -51,10 +51,11 @@ public void configureOrganizationAuthenticationManager(Organization organization switch (authOrigin.getType()) { case LDAP: tmpAuthenticationManagerMap.put(authOrigin.getId(), - new LDAPAuthenticationManager(authOrigin, algorithm, secretKey, expiration)); + new LDAPAuthenticationManager(authOrigin, algorithm, secretKey, catalogDBAdaptorFactory, expiration)); break; case AzureAD: - tmpAuthenticationManagerMap.put(authOrigin.getId(), new AzureADAuthenticationManager(authOrigin)); + tmpAuthenticationManagerMap.put(authOrigin.getId(), new AzureADAuthenticationManager(authOrigin, + catalogDBAdaptorFactory)); break; case OPENCGA: CatalogAuthenticationManager catalogAuthenticationManager = @@ -65,7 +66,7 @@ public void configureOrganizationAuthenticationManager(Organization organization break; case SSO: tmpAuthenticationManagerMap.put(authOrigin.getId(), new SSOAuthenticationManager(algorithm, secretKey, - expiration)); + catalogDBAdaptorFactory, expiration)); break; default: logger.warn("Unexpected authentication origin type '{}' for id '{}' found in organization '{}'. " diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java index 1ff667bdba..e8107017ef 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java @@ -55,6 +55,7 @@ enum QueryParams implements QueryParam { ORGANISM_ASSEMBLY("organism.assembly", TEXT, ""), CURRENT_RELEASE("currentRelease", INTEGER, ""), FQN("fqn", TEXT, ""), + INTERNAL_FEDERATED("internal.federated", BOOLEAN, ""), INTERNAL_STATUS("internal.status", TEXT_ARRAY, ""), INTERNAL_STATUS_ID("internal.status.id", TEXT, ""), INTERNAL_STATUS_MSG("internal.status.msg", TEXT, ""), diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java index 0afd3f255d..ff925097f1 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java @@ -426,6 +426,7 @@ enum QueryParams implements QueryParam { STATUS_ID("status.id", TEXT, ""), STATUS_DATE("status.date", TEXT, ""), STATUS_DESCRIPTION("status.description", TEXT, ""), + INTERNAL_FEDERATED("internal.federated", BOOLEAN, ""), INTERNAL_STATUS("internal.status", TEXT_ARRAY, ""), INTERNAL_STATUS_ID("internal.status.id", TEXT, ""), INTERNAL_STATUS_DATE("internal.status.date", TEXT, ""), diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java index 3a91bc2628..d6ea97b3af 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java @@ -869,6 +869,7 @@ private Bson parseQuery(Query query) throws CatalogDBException { case ORGANISM_SCIENTIFIC_NAME: case ORGANISM_COMMON_NAME: case ORGANISM_ASSEMBLY: + case INTERNAL_FEDERATED: case INTERNAL_STATUS_MSG: case INTERNAL_STATUS_DATE: case INTERNAL_DATASTORES: diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java index c91fd4c366..7cf1ec343c 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java @@ -1829,6 +1829,7 @@ private Bson parseQuery(Query query) throws CatalogDBException { case UUID: case NAME: case DESCRIPTION: + case INTERNAL_FEDERATED: case INTERNAL_STATUS_DATE: case DATASTORES: case SIZE: diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java index 9259185281..92e36d3a79 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java @@ -37,6 +37,7 @@ import org.opencb.opencga.core.models.audit.AuditRecord; import org.opencb.opencga.core.models.cohort.Cohort; import org.opencb.opencga.core.models.common.Enums; +import org.opencb.opencga.core.models.federation.FederationClientRef; import org.opencb.opencga.core.models.individual.Individual; import org.opencb.opencga.core.models.project.*; import org.opencb.opencga.core.models.sample.Sample; @@ -212,6 +213,11 @@ private void validateProjectForCreation(String organizationId, Project project) ProjectDBAdaptor.QueryParams.MODIFICATION_DATE.key())); project.setCurrentRelease(1); project.setInternal(ProjectInternal.init()); + project.setFederation(ParamUtils.defaultObject(project.getFederation(), new FederationClientRef())); + if (StringUtils.isNotEmpty(project.getFederation().getId())) { + FederationUtils.validateFederationId(organizationId, project.getFederation().getId(), catalogDBAdaptorFactory); + project.getInternal().setFederated(true); + } project.setAttributes(ParamUtils.defaultObject(project.getAttributes(), HashMap::new)); project.setFqn(FqnUtils.buildFqn(organizationId, project.getId())); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java index 70f933f643..56b342f3bd 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java @@ -37,10 +37,7 @@ import org.opencb.opencga.catalog.io.CatalogIOManager; import org.opencb.opencga.catalog.io.IOManager; import org.opencb.opencga.catalog.io.IOManagerFactory; -import org.opencb.opencga.catalog.utils.AnnotationUtils; -import org.opencb.opencga.catalog.utils.CatalogFqn; -import org.opencb.opencga.catalog.utils.ParamUtils; -import org.opencb.opencga.catalog.utils.UuidUtils; +import org.opencb.opencga.catalog.utils.*; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; @@ -53,6 +50,7 @@ import org.opencb.opencga.core.models.cohort.CohortPermissions; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.family.FamilyPermissions; +import org.opencb.opencga.core.models.federation.FederationClientRef; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.models.file.FileInternal; import org.opencb.opencga.core.models.file.FilePermissions; @@ -428,6 +426,11 @@ public OpenCGAResult create(String projectStr, Study study, QueryOptions study.setSources(ParamUtils.defaultObject(study.getSources(), Collections::emptyList)); study.setDescription(ParamUtils.defaultString(study.getDescription(), "")); study.setInternal(StudyInternal.init()); + study.setFederation(ParamUtils.defaultObject(study.getFederation(), new FederationClientRef("", "", ""))); + if (StringUtils.isNotEmpty(study.getFederation().getId())) { + FederationUtils.validateFederationId(organizationId, study.getFederation().getId(), catalogDBAdaptorFactory); + study.getInternal().setFederated(true); + } study.setStatus(ParamUtils.defaultObject(study.getStatus(), Status::new)); study.setCreationDate(ParamUtils.checkDateOrGetCurrentDate(study.getCreationDate(), StudyDBAdaptor.QueryParams.CREATION_DATE.key())); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/FederationUtils.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/FederationUtils.java new file mode 100644 index 0000000000..33d6cddc38 --- /dev/null +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/FederationUtils.java @@ -0,0 +1,32 @@ +package org.opencb.opencga.catalog.utils; + +import org.apache.commons.collections4.CollectionUtils; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.catalog.db.DBAdaptorFactory; +import org.opencb.opencga.catalog.db.api.OrganizationDBAdaptor; +import org.opencb.opencga.catalog.exceptions.CatalogDBException; +import org.opencb.opencga.catalog.exceptions.CatalogParameterException; +import org.opencb.opencga.core.models.organizations.Organization; + +public class FederationUtils { + + private static final QueryOptions INCLUDE_ORG_FEDERATION = new QueryOptions(QueryOptions.INCLUDE, + OrganizationDBAdaptor.QueryParams.FEDERATION.key()); + + public static void validateFederationId(String organizationId, String federationId, DBAdaptorFactory dbAdaptorFactory) + throws CatalogParameterException { + ParamUtils.checkParameter(organizationId, "organizationId"); + ParamUtils.checkParameter(federationId, "federationId"); + Organization organization = null; + try { + organization = dbAdaptorFactory.getCatalogOrganizationDBAdaptor(organizationId).get(INCLUDE_ORG_FEDERATION).first(); + } catch (CatalogDBException e) { + throw new CatalogParameterException("Organization " + organizationId + " not found", e); + } + if (organization.getFederation() == null || CollectionUtils.isEmpty(organization.getFederation().getClients()) + || organization.getFederation().getClients().stream().noneMatch(f -> f.getId().equals(federationId))) { + throw new CatalogParameterException("Organization " + organizationId + " is not federated with " + federationId); + } + } + +} diff --git a/opencga-catalog/src/main/resources/catalog-indexes.txt b/opencga-catalog/src/main/resources/catalog-indexes.txt index 2beb12d1c2..9f51f7a80b 100644 --- a/opencga-catalog/src/main/resources/catalog-indexes.txt +++ b/opencga-catalog/src/main/resources/catalog-indexes.txt @@ -10,10 +10,11 @@ {"collections": ["note", "note_archive"], "fields": {"_creationDate": 1, "studyUid": 1}, "options": {}} {"collections": ["note", "note_archive"], "fields": {"_modificationDate": 1, "studyUid": 1}, "options": {}} -{"collections": ["project"], "fields": {"id": 1}, "options": {"unique": true}} +{"collections": ["project"], "fields": {"id": 1}, "options": {}} {"collections": ["project"], "fields": {"uid": 1}, "options": {"unique": true}} {"collections": ["project"], "fields": {"uuid": 1}, "options": {"unique": true}} {"collections": ["project"], "fields": {"fqn": 1}, "options": {"unique": true}} +{"collections": ["project"], "fields": {"internal.federated": 1}, "options": {}} {"collections": ["user"], "fields": {"id": 1}, "options": {"unique": true}} @@ -25,6 +26,7 @@ {"collections": ["study"], "fields": {"groups.userIds": 1, "uid": 1}, "options": {"unique": true}} {"collections": ["study"], "fields": {"_acl": 1}, "options": {}} {"collections": ["study"], "fields": {"_project.uid": 1}, "options": {}} +{"collections": ["study"], "fields": {"internal.federated": 1}, "options": {}} {"collections": ["study"], "fields": {"variableSets.id": 1, "uid": 1}, "options": {"unique": true}} {"collections": ["job"], "fields": {"uuid": 1}, "options": {"unique": true}} diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java index c9d2ca8aec..2ba03ee135 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java @@ -53,7 +53,7 @@ public void setUp() throws Exception { @Test public void testCreateJWTToken() throws Exception { - jwtToken = jwtSessionManager.createJWTToken(organizationId, null, "testUser", Collections.emptyMap(), 60L); + jwtToken = jwtSessionManager.createJWTToken(organizationId, null, "testUser", Collections.emptyMap(), null, 60L); } @Test @@ -81,7 +81,7 @@ public void testInvalidSecretKey() throws CatalogAuthenticationException { @Test public void testNonExpiringToken() throws CatalogException { - String nonExpiringToken = jwtSessionManager.createJWTToken(organizationId, null, "System", null, -1L); + String nonExpiringToken = jwtSessionManager.createJWTToken(organizationId, null, "System", null, null, -1L); assertEquals(jwtSessionManager.getUser(nonExpiringToken), "System"); assertNull(jwtSessionManager.getExpiration(nonExpiringToken)); } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java b/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java index 9c139140c7..b913f1d79d 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java @@ -74,6 +74,7 @@ public class FieldConstants { //Federation public static final String FEDERATION_CLIENT_ID_DESCRIPTION = "Unique ID to identify the federation server."; public static final String FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION = "Description of the federation server."; + public static final String FEDERATION_CLIENT_VERSION_DESCRIPTION = "OpenCGA version of the federation server."; public static final String FEDERATION_CLIENT_EMAIL_DESCRIPTION = "Email of the federation server."; public static final String FEDERATION_CLIENT_URL_DESCRIPTION = "URL of the federation server."; public static final String FEDERATION_CLIENT_ORGANIZATION_ID_DESCRIPTION = "Organization ID from the federation server."; @@ -415,10 +416,12 @@ public class FieldConstants { //Project public static final String PROJECT_FQN = "Full Qualified Name (organization@projectId)."; public static final String PROJECT_ORGANISM = "Organism to which the project belongs."; + public static final String PROJECT_FEDERATION = "Object containing federation details if the project is not local."; public static final String PROJECT_STUDIES = "Project study list."; //ProjectInternal public static final String PROJECT_INTERNAL_DATA_STORES = "Default value is VARIANT."; + public static final String PROJECT_INTERNAL_FEDERATED = "Flag indicating if the project is imported from a federated installation."; public static final String PROJECT_CELLBASE = "Cellbase configuration."; //StudyNotification @@ -427,6 +430,7 @@ public class FieldConstants { //Study public static final String STUDY_ALIAS = "Study alias."; public static final String STUDY_SIZE = "Study size."; + public static final String STUDY_FEDERATION = "Object containing federation details if the study is not local."; public static final String STUDY_NOTIFICATION = "Object represents study notification."; public static final String STUDY_GROUPS = "A List with related groups."; public static final String STUDY_FILES = "A List with related files."; @@ -453,6 +457,7 @@ public class FieldConstants { //StudyInternal public static final String STUDY_INTERNAL_INDEX = "Study index."; + public static final String STUDY_INTERNAL_FEDERATED = "Flag indicating if the study is imported from a federated installation."; public static final String STUDY_INTERNAL_CONFIGURATION = "Study configuration."; //AdditionalInfo diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java b/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java index 72c24969f1..74b62ae6ce 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/client/GenericClient.java @@ -15,6 +15,16 @@ public GenericClient(String token, ClientConfiguration clientConfiguration) { super(token, clientConfiguration); } + /** + * Returns info about current OpenCGA code. + * @return a RestResponse object. + * @throws ClientException ClientException if there is any server error. + */ + public RestResponse about() throws ClientException { + ObjectMap params = new ObjectMap(); + return execute("meta", null, null, null, "about", params, GET, ObjectMap.class); + } + /** * Get an anonymous token to gain access to the system. * @param organization Organization id. diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java index f732114f62..213e5f7ac0 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java @@ -53,16 +53,16 @@ public static Date getExpirationDate(String token) { return res; } - public static List getFederations(Map claims) { + public static List getFederations(Map claims) { List o = (List) claims.get(FEDERATIONS); if (CollectionUtils.isNotEmpty(o)) { - List federationList = new ArrayList<>(o.size()); + List federationList = new ArrayList<>(o.size()); for (Object federationObject : o) { if (federationObject instanceof Map) { String id = ((Map) federationObject).get("id"); List projectIds = ((Map>) federationObject).get("projectIds"); List studyIds = ((Map>) federationObject).get("studyIds"); - federationList.add(new JwtPayload.Federation(id, projectIds, studyIds)); + federationList.add(new JwtPayload.FederationJwtPayload(id, projectIds, studyIds)); } } return federationList; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java index d9dca79389..6e348b9d43 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/JwtPayload.java @@ -19,14 +19,14 @@ public class JwtPayload { private final String issuer; // Issuer of the JWT token. private final Date issuedAt; // Time when the JWT was issued. private final Date expirationTime; // Expiration time of the JWT. - private final List federations; // Federation information containing the projects and studies the user has access to. + private final List federations; // Federation information containing the projects and studies the user has access to. private final String token; public static final String AUTH_ORIGIN = "authOrigin"; public static final String FEDERATIONS = "federations"; public JwtPayload(String userId, String organization, AuthenticationOrigin.AuthenticationType authOrigin, String issuer, Date issuedAt, - Date expirationTime, List federationList, String token) { + Date expirationTime, List federationList, String token) { this.token = token; this.userId = userId; this.organization = organization; @@ -142,19 +142,19 @@ public String getToken() { return token; } - public List getFederations() { + public List getFederations() { return federations; } - public static class Federation { + public static class FederationJwtPayload { private String id; private List projectIds; private List studyIds; - public Federation() { + public FederationJwtPayload() { } - public Federation(String id, List projectIds, List studyIds) { + public FederationJwtPayload(String id, List projectIds, List studyIds) { this.id = id; this.projectIds = projectIds; this.studyIds = studyIds; @@ -174,7 +174,7 @@ public String getId() { return id; } - public Federation setId(String id) { + public FederationJwtPayload setId(String id) { this.id = id; return this; } @@ -183,7 +183,7 @@ public List getProjectIds() { return projectIds; } - public Federation setProjectIds(List projectIds) { + public FederationJwtPayload setProjectIds(List projectIds) { this.projectIds = projectIds; return this; } @@ -192,7 +192,7 @@ public List getStudyIds() { return studyIds; } - public Federation setStudyIds(List studyIds) { + public FederationJwtPayload setStudyIds(List studyIds) { this.studyIds = studyIds; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java index 50190c1003..e581e23e45 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java @@ -12,6 +12,9 @@ public class FederationClient { @DataField(id = "description", description = FieldConstants.FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION) private String description; + @DataField(id = "version", description = FieldConstants.FEDERATION_CLIENT_VERSION_DESCRIPTION) + private String version; + @DataField(id = "email", description = FieldConstants.FEDERATION_CLIENT_EMAIL_DESCRIPTION) private String email; @@ -36,15 +39,17 @@ public class FederationClient { public FederationClient() { } - public FederationClient(String id, String description, String email, String url, String organizationId, String userId, String password, - String secretKey) { + public FederationClient(String id, String description, String version, String email, String url, String organizationId, String userId, + String password, String token, String secretKey) { this.id = id; this.description = description; + this.version = version; this.email = email; this.url = url; this.organizationId = organizationId; this.userId = userId; this.password = password; + this.token = token; this.secretKey = secretKey; } @@ -53,6 +58,7 @@ public String toString() { final StringBuilder sb = new StringBuilder("FederationClient{"); sb.append("id='").append(id).append('\''); sb.append(", description='").append(description).append('\''); + sb.append(", version='").append(version).append('\''); sb.append(", email='").append(email).append('\''); sb.append(", url='").append(url).append('\''); sb.append(", organizationId='").append(organizationId).append('\''); @@ -81,6 +87,14 @@ public FederationClient setDescription(String description) { return this; } + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + public String getEmail() { return email; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java index 5feada2abd..33186c144c 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java @@ -2,6 +2,6 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -@JsonIgnoreProperties({"password", "secretKey"}) +@JsonIgnoreProperties({"password", "secretKey", "token"}) public class FederationClientMixin { } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java new file mode 100644 index 0000000000..5140270888 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java @@ -0,0 +1,64 @@ +package org.opencb.opencga.core.models.federation; + +import org.opencb.commons.annotations.DataField; +import org.opencb.opencga.core.api.FieldConstants; + +public class FederationClientRef { + + @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, + description = FieldConstants.FEDERATION_CLIENT_ID_DESCRIPTION) + private String id; + + @DataField(id = "description", description = FieldConstants.FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION) + private String description; + + @DataField(id = "version", description = FieldConstants.FEDERATION_CLIENT_VERSION_DESCRIPTION) + private String version; + + public FederationClientRef() { + } + + public FederationClientRef(String id) { + this(id, "", ""); + } + + public FederationClientRef(String id, String description, String version) { + this.id = id; + this.description = description; + this.version = version; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("FederationClientRef{"); + sb.append("id='").append(id).append('\''); + sb.append(", description='").append(description).append('\''); + sb.append(", version='").append(version).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java index 0fd4c17287..9ca3f6d4f5 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java @@ -22,6 +22,7 @@ import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.storage.CellBaseConfiguration; import org.opencb.opencga.core.models.PrivateFields; +import org.opencb.opencga.core.models.federation.FederationClientRef; import org.opencb.opencga.core.models.study.Study; import java.util.HashMap; @@ -106,6 +107,8 @@ public class Project extends PrivateFields { description = FieldConstants.GENERIC_RELEASE_DESCRIPTION) private int currentRelease; + @DataField(id = "federation", description = FieldConstants.PROJECT_FEDERATION) + private FederationClientRef federation; @DataField(id = "studies", description = FieldConstants.PROJECT_STUDIES) @@ -163,19 +166,20 @@ public Project(String id, String name, String creationDate, String modificationD // Clone a project public Project(Project project) { - this(project.getUid(), project.getId(), project.getUuid(), project.getName(), project.getFqn(), project.getCreationDate(), - project.getModificationDate(), project.getDescription(), project.getOrganism(), project.getCellbase(), - project.getCurrentRelease(), project.getStudies(), project.getInternal(), project.getAttributes()); + this(project.getUid(), project.getId(), project.getUuid(), project.getName(), project.getFqn(), project.getFederation(), + project.getCreationDate(), project.getModificationDate(), project.getDescription(), project.getOrganism(), + project.getCellbase(), project.getCurrentRelease(), project.getStudies(), project.getInternal(), project.getAttributes()); } - public Project(long uid, String id, String uuid, String name, String fqn, String creationDate, String modificationDate, - String description, ProjectOrganism organism, CellBaseConfiguration cellbase, int currentRelease, List studies, - ProjectInternal internal, Map attributes) { + public Project(long uid, String id, String uuid, String name, String fqn, FederationClientRef federation, String creationDate, + String modificationDate, String description, ProjectOrganism organism, CellBaseConfiguration cellbase, + int currentRelease, List studies, ProjectInternal internal, Map attributes) { super(uid); this.id = id; this.uuid = uuid; this.name = name; this.fqn = fqn; + this.federation = federation; this.creationDate = creationDate; this.modificationDate = modificationDate; this.description = description; @@ -200,6 +204,7 @@ public String toString() { sb.append(", organism=").append(organism); sb.append(", cellbase=").append(cellbase); sb.append(", currentRelease=").append(currentRelease); + sb.append(", federation=").append(federation); sb.append(", studies=").append(studies); sb.append(", internal=").append(internal); sb.append(", attributes=").append(attributes); @@ -294,6 +299,14 @@ public Project setOrganism(ProjectOrganism organism) { return this; } + public FederationClientRef getFederation() { + return federation; + } + + public void setFederation(FederationClientRef federation) { + this.federation = federation; + } + public ProjectInternal getInternal() { return internal; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/ProjectInternal.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/ProjectInternal.java index 4fcae0a9ff..7a381530fe 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/ProjectInternal.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/ProjectInternal.java @@ -28,17 +28,21 @@ public class ProjectInternal extends Internal { description = FieldConstants.PROJECT_INTERNAL_DATA_STORES) private Datastores datastores; + @DataField(id = "federated", indexed = true, description = FieldConstants.PROJECT_INTERNAL_FEDERATED) + private boolean federated; public ProjectInternal() { } - public ProjectInternal(InternalStatus status, String registrationDate, String modificationDate, Datastores datastores) { + public ProjectInternal(InternalStatus status, String registrationDate, String modificationDate, Datastores datastores, + boolean federated) { super(status, registrationDate, modificationDate); this.datastores = datastores; + this.federated = federated; } public static ProjectInternal init() { - return new ProjectInternal(new InternalStatus(), TimeUtils.getTime(), TimeUtils.getTime(), new Datastores()); + return new ProjectInternal(new InternalStatus(), TimeUtils.getTime(), TimeUtils.getTime(), new Datastores(), false); } @Override @@ -48,6 +52,7 @@ public String toString() { sb.append(", registrationDate='").append(registrationDate).append('\''); sb.append(", modificationDate='").append(lastModified).append('\''); sb.append(", datastores=").append(datastores); + sb.append(", federated=").append(federated); sb.append('}'); return sb.toString(); } @@ -88,4 +93,12 @@ public ProjectInternal setLastModified(String lastModified) { this.lastModified = lastModified; return this; } + + public boolean isFederated() { + return federated; + } + + public void setFederated(boolean federated) { + this.federated = federated; + } } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java index 7f36ebe054..087044c0b5 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java @@ -27,6 +27,7 @@ import org.opencb.opencga.core.models.common.AdditionalInfo; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.common.ExternalSource; +import org.opencb.opencga.core.models.federation.FederationClientRef; import org.opencb.opencga.core.models.notes.Note; import java.net.URI; @@ -101,6 +102,8 @@ public class Study extends PrivateFields { description = FieldConstants.PROJECT_FQN) private String fqn; + @DataField(id = "federation", description = FieldConstants.STUDY_FEDERATION) + private FederationClientRef federation; @DataField(id = "notification", description = FieldConstants.STUDY_NOTIFICATION) @@ -225,6 +228,7 @@ public String toString() { sb.append(", alias='").append(alias).append('\''); sb.append(", creationDate='").append(creationDate).append('\''); sb.append(", modificationDate='").append(modificationDate).append('\''); + sb.append(", federation='").append(federation).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", size=").append(size); sb.append(", fqn='").append(fqn).append('\''); @@ -305,6 +309,14 @@ public Study setModificationDate(String modificationDate) { return this; } + public FederationClientRef getFederation() { + return federation; + } + + public void setFederation(FederationClientRef federation) { + this.federation = federation; + } + public String getDescription() { return description; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java index d87b6cb7f6..6870bc4f9d 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java @@ -39,18 +39,23 @@ public class StudyInternal extends Internal { description = FieldConstants.STUDY_INTERNAL_CONFIGURATION) private StudyConfiguration configuration; + @DataField(id = "federated", indexed = true, description = FieldConstants.STUDY_INTERNAL_FEDERATED) + private boolean federated; + public StudyInternal() { } public StudyInternal(InternalStatus status, String registrationDate, String modificationDate, StudyIndex index, - StudyConfiguration configuration) { + StudyConfiguration configuration, boolean federated) { super(status, registrationDate, modificationDate); this.index = index; this.configuration = configuration; + this.federated = false; } public static StudyInternal init() { - return new StudyInternal(new InternalStatus(), TimeUtils.getTime(), TimeUtils.getTime(), StudyIndex.init(), StudyConfiguration.init()); + return new StudyInternal(new InternalStatus(), TimeUtils.getTime(), TimeUtils.getTime(), StudyIndex.init(), + StudyConfiguration.init(), false); } @Override @@ -61,6 +66,7 @@ public String toString() { sb.append(", status=").append(status); sb.append(", index=").append(index); sb.append(", configuration=").append(configuration); + sb.append(", federated=").append(federated); sb.append('}'); return sb.toString(); } @@ -100,4 +106,12 @@ public StudyInternal setConfiguration(StudyConfiguration configuration) { this.configuration = configuration; return this; } + + public boolean isFederated() { + return federated; + } + + public void setFederated(boolean federated) { + this.federated = federated; + } } diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index 2df5a8d71f..c8231bdddb 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -756,7 +756,7 @@ protected Response createOkResponse(Object obj, List events) { return response; } - private Response.Status getResponseStatus(List> list) { + protected Response.Status getResponseStatus(List> list) { if (list != null) { for (OpenCGAResult openCGAResult : list) { if (CollectionUtils.isNotEmpty(openCGAResult.getEvents())) { @@ -794,11 +794,11 @@ protected Response createOkResponse(InputStream o1, MediaType o2, String fileNam return buildResponse(Response.ok(o1, o2).header("content-disposition", "attachment; filename =" + fileName)); } - void logResponse(Response.StatusType statusInfo) { + protected void logResponse(Response.StatusType statusInfo) { logResponse(statusInfo, null, startTime, requestDescription); } - void logResponse(Response.StatusType statusInfo, RestResponse queryResponse) { + protected void logResponse(Response.StatusType statusInfo, RestResponse queryResponse) { logResponse(statusInfo, queryResponse, startTime, requestDescription); } From fbd759b78e83515fbf6dcd08f45daf402e4a143a Mon Sep 17 00:00:00 2001 From: pfurio Date: Thu, 16 Jan 2025 13:29:24 +0100 Subject: [PATCH 097/122] catalog: methods to update federation server/client params, #TASK-7192 --- .../authentication/AuthenticationManager.java | 97 +++++++++++++++++-- .../AzureADAuthenticationManager.java | 9 +- .../CatalogAuthenticationManager.java | 18 +++- .../auth/authentication/JwtManager.java | 18 ++-- .../LDAPAuthenticationManager.java | 30 +++++- .../SSOAuthenticationManager.java | 13 ++- .../azure/AuthenticationFactory.java | 51 ++++++++-- .../catalog/db/api/OrganizationDBAdaptor.java | 5 + .../mongodb/OrganizationMongoDBAdaptor.java | 41 ++++++-- .../catalog/managers/ProjectManager.java | 4 +- .../catalog/managers/StudyManager.java | 4 +- .../opencga/catalog/managers/UserManager.java | 24 +++-- .../authentication/JwtSessionManagerTest.java | 4 +- .../opencga/core/common/JacksonUtils.java | 14 ++- .../opencga/core/models/common/Enums.java | 2 + .../core/models/federation/Federation.java | 14 +-- ...lient.java => FederationClientParams.java} | 30 +++--- ....java => FederationClientParamsMixin.java} | 2 +- ...ef.java => FederationClientParamsRef.java} | 8 +- ...erver.java => FederationServerParams.java} | 22 ++--- ....java => FederationServerParamsMixin.java} | 2 +- .../opencga/core/models/project/Project.java | 10 +- .../opencga/core/models/study/Study.java | 8 +- .../opencga/core/models/user/Account.java | 4 + 24 files changed, 329 insertions(+), 105 deletions(-) rename opencga-core/src/main/java/org/opencb/opencga/core/models/federation/{FederationClient.java => FederationClientParams.java} (79%) rename opencga-core/src/main/java/org/opencb/opencga/core/models/federation/{FederationClientMixin.java => FederationClientParamsMixin.java} (80%) rename opencga-core/src/main/java/org/opencb/opencga/core/models/federation/{FederationClientRef.java => FederationClientParamsRef.java} (87%) rename opencga-core/src/main/java/org/opencb/opencga/core/models/federation/{FederationServer.java => FederationServerParams.java} (80%) rename opencga-core/src/main/java/org/opencb/opencga/core/models/federation/{FederationServerMixin.java => FederationServerParamsMixin.java} (78%) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java index 7b25cddf83..27ba16b0f7 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java @@ -33,6 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; import javax.crypto.spec.SecretKeySpec; import java.io.Closeable; import java.security.Key; @@ -78,6 +79,20 @@ Key converStringToKeyObject(String keyString, String jcaAlgorithm) { public abstract AuthenticationResponse authenticate(String organizationId, String userId, String password) throws CatalogAuthenticationException; + /** + * Authenticate the user against the Authentication server. + * + * @param organizationId Organization id. + * @param userId User to authenticate + * @param password Password. + * @param secretKey Secret key to apply to the token. + * @return AuthenticationResponse object. + * @throws CatalogAuthenticationException CatalogAuthenticationException if any of the credentials are wrong or the access is denied + * for any other reason. + */ + public abstract AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) + throws CatalogAuthenticationException; + /** * Authenticate the user against the Authentication server. * @@ -95,6 +110,25 @@ public AuthenticationResponse refreshToken(String refreshToken) throws CatalogAu } } + /** + * Validates that the token is valid. + * + * @param token token that have been assigned to a user. + * @param secretKey secret key to be used for the token validation (may be null). + * @throws CatalogAuthenticationException when the token does not correspond to any user, is expired or has been altered. + */ + public void validateToken(String token, @Nullable String secretKey) throws CatalogAuthenticationException { + if (StringUtils.isEmpty(token) || "null".equalsIgnoreCase(token)) { + throw new CatalogAuthenticationException("Token is null or empty."); + } + Key privateKey = null; + if (secretKey != null) { + privateKey = converStringToKeyObject(secretKey, jwtManager.getAlgorithm().getJcaName()); + } + + jwtManager.validateToken(token, privateKey); + } + /** * Obtains the userId corresponding to the token. * @@ -174,7 +208,7 @@ public abstract void changePassword(String organizationId, String userId, String * @return A token. */ public String createToken(String organizationId, String userId) throws CatalogAuthenticationException { - return createToken(organizationId, userId, Collections.emptyMap(), expiration); + return createToken(organizationId, userId, Collections.emptyMap(), expiration, (Key) null); } /** @@ -182,12 +216,12 @@ public String createToken(String organizationId, String userId) throws CatalogAu * * @param organizationId Organization id. * @param userId user. - * @param expiration expiration time. + * @param secretKey secret key to be used for the token generation. * @throws CatalogAuthenticationException CatalogAuthenticationException * @return A token. */ - public String createToken(String organizationId, String userId, long expiration) throws CatalogAuthenticationException { - return createToken(organizationId, userId, Collections.emptyMap(), expiration); + public String createToken(String organizationId, String userId, String secretKey) throws CatalogAuthenticationException { + return createToken(organizationId, userId, Collections.emptyMap(), expiration, secretKey); } /** @@ -200,7 +234,57 @@ public String createToken(String organizationId, String userId, long expiration) * @return A token. */ public String createToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { - return createToken(organizationId, userId, claims, expiration); + return createToken(organizationId, userId, claims, expiration, (Key) null); + } + + /** + * Create a token for the user with default expiration time. + * + * @param organizationId Organization id. + * @param userId user. + * @param claims claims. + * @param secretKey secret key to be used for the token generation. + * @throws CatalogAuthenticationException CatalogAuthenticationException + * @return A token. + */ + public String createToken(String organizationId, String userId, Map claims, String secretKey) + throws CatalogAuthenticationException { + return createToken(organizationId, userId, claims, expiration, secretKey); + } + + /** + * Create a token for the user. + * + * @param organizationId Organization id. + * @param userId user. + * @param claims claims. + * @param expiration Expiration time in seconds. + * @throws CatalogAuthenticationException CatalogAuthenticationException + * @return A token. + */ + public String createToken(String organizationId, String userId, Map claims, long expiration) + throws CatalogAuthenticationException { + return createToken(organizationId, userId, claims, expiration, (Key) null); + } + + /** + * Create a token for the user. + * + * @param organizationId Organization id. + * @param userId user. + * @param claims claims. + * @param expiration Expiration time in seconds. + * @param secretKey Secret key to be used for the token generation. + * @throws CatalogAuthenticationException CatalogAuthenticationException + * @return A token. + */ + public String createToken(String organizationId, String userId, Map claims, long expiration, String secretKey) + throws CatalogAuthenticationException { + Key privateKey = null; + if (secretKey != null) { + privateKey = converStringToKeyObject(secretKey, jwtManager.getAlgorithm().getJcaName()); + } + return createToken(organizationId, userId, claims, expiration, privateKey); } /** @@ -210,10 +294,11 @@ public String createToken(String organizationId, String userId, Map claims, long expiration) + public abstract String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) throws CatalogAuthenticationException; /** diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java index f5445c4628..7728a138ba 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java @@ -50,6 +50,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; +import java.security.Key; import java.security.PublicKey; import java.security.cert.CertificateException; import java.security.cert.CertificateFactory; @@ -260,6 +261,12 @@ public AuthenticationResponse authenticate(String organizationId, String userId, } } + @Override + public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) + throws CatalogAuthenticationException { + throw new UnsupportedOperationException("AzureAD creates its own tokens. Please, call to the other authenticate method."); + } + @Override public AuthenticationResponse refreshToken(String refreshToken) throws CatalogAuthenticationException { AuthenticationContext context; @@ -422,7 +429,7 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) { + public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) { // Tokens are generated by Azure via authorization code or user-password throw new UnsupportedOperationException("Tokens are generated by Azure via authorization code or user-password"); } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java index 8e42c91911..60af601a22 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java @@ -84,6 +84,17 @@ public AuthenticationResponse authenticate(String organizationId, String userId, } } + @Override + public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) + throws CatalogAuthenticationException { + try { + dbAdaptorFactory.getCatalogUserDBAdaptor(organizationId).authenticate(userId, password); + return new AuthenticationResponse(createToken(organizationId, userId, secretKey)); + } catch (CatalogDBException e) { + throw new CatalogAuthenticationException("Could not validate '" + userId + "' password\n" + e.getMessage(), e); + } + } + @Override public List getUsersFromRemoteGroup(String group) throws CatalogException { throw new UnsupportedOperationException(); @@ -110,18 +121,19 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) + public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, - expiration); + secretKey, expiration); } @Override public String createNonExpiringToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, 0L); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, + null, 0L); } @Override diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java index 91ccd1619c..f3c69cdec0 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java @@ -90,7 +90,8 @@ public JwtManager setPublicKey(Key publicKey) { } public String createJWTToken(String organizationId, AuthenticationOrigin.AuthenticationType type, String userId, - Map claims, List federations, long expiration) { + Map claims, List federations, Key secretKey, + long expiration) { long currentTime = System.currentTimeMillis(); JwtBuilder jwtBuilder = Jwts.builder(); @@ -108,7 +109,7 @@ public String createJWTToken(String organizationId, AuthenticationOrigin.Authent .setAudience(organizationId) .setIssuer("OpenCGA") .setIssuedAt(new Date(currentTime)) - .signWith(privateKey, algorithm); + .signWith(secretKey != null ? secretKey : privateKey, algorithm); // Set the expiration in number of seconds only if 'expiration' is greater than 0 if (expiration > 0) { @@ -119,7 +120,7 @@ public String createJWTToken(String organizationId, AuthenticationOrigin.Authent } public void validateToken(String token) throws CatalogAuthenticationException { - validateToken(token, this.publicKey); + parseClaims(token); } public void validateToken(String token, Key publicKey) throws CatalogAuthenticationException { @@ -127,7 +128,7 @@ public void validateToken(String token, Key publicKey) throws CatalogAuthenticat } public JwtPayload getPayload(String token) throws CatalogAuthenticationException { - Claims body = parseClaims(token, publicKey).getBody(); + Claims body = parseClaims(token).getBody(); return new JwtPayload(body.getSubject(), body.getAudience(), getAuthOrigin(body), body.getIssuer(), body.getIssuedAt(), body.getExpiration(), JwtUtils.getFederations(body), token); } @@ -164,7 +165,7 @@ public String getUser(String token, Key publicKey) throws CatalogAuthenticationE } public String getUser(String token, String fieldKey) throws CatalogAuthenticationException { - return String.valueOf(parseClaims(token, publicKey).getBody().get(fieldKey)); + return String.valueOf(parseClaims(token).getBody().get(fieldKey)); } public List getGroups(String token, String fieldKey) throws CatalogAuthenticationException { @@ -197,9 +198,14 @@ public Object getClaim(String token, String claimId, Key publicKey) throws Catal return parseClaims(token, publicKey).getBody().get(claimId); } + private Jws parseClaims(String token) throws CatalogAuthenticationException { + return parseClaims(token, null); + } + private Jws parseClaims(String token, Key publicKey) throws CatalogAuthenticationException { + Key key = publicKey != null ? publicKey : this.publicKey; try { - return Jwts.parser().setSigningKey(publicKey).parseClaimsJws(token); + return Jwts.parser().setSigningKey(key).parseClaimsJws(token); } catch (ExpiredJwtException e) { logger.error("JWT Error: '{}'", e.getMessage(), e); throw CatalogAuthenticationException.tokenExpired(token); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java index 9d1ab62a93..731aff9df2 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java @@ -171,6 +171,30 @@ public AuthenticationResponse authenticate(String organizationId, String userId, return new AuthenticationResponse(createToken(organizationId, userId, claims)); } + @Override + public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) + throws CatalogAuthenticationException { + Map claims = new HashMap<>(); + + List userInfoFromLDAP = getUserInfoFromLDAP(Arrays.asList(userId), usersSearch); + if (userInfoFromLDAP.isEmpty()) { + throw new CatalogAuthenticationException("LDAP: The user id " + userId + " could not be found."); + } + + String rdn = getDN(userInfoFromLDAP.get(0)); + claims.put(OPENCGA_DISTINGUISHED_NAME, rdn); + + // Attempt to authenticate + Hashtable env = getEnv(rdn, password); + try { + getDirContext(env).close(); + } catch (NamingException e) { + throw wrapException(e); + } + + return new AuthenticationResponse(createToken(organizationId, userId, claims, secretKey)); + } + @Override public List getUsersFromRemoteGroup(String group) throws CatalogException { List usersFromLDAP = getUsersFromLDAPGroup(group, groupsSearch); @@ -237,17 +261,17 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) + public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, expiration); + return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, secretKey, expiration); } @Override public String createNonExpiringToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, 0L); + return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, null, 0L); } /* Private methods */ diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java index 2edf385a33..30a5fe1fb6 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java @@ -42,6 +42,12 @@ public AuthenticationResponse authenticate(String organizationId, String userId, throw new NotImplementedException("Authentication should be done through SSO"); } + @Override + public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) + throws CatalogAuthenticationException { + throw new NotImplementedException("Authentication should be done through SSO"); + } + @Override public List getUsersFromRemoteGroup(String group) throws CatalogException { throw new NotImplementedException("Operation not implemented"); @@ -73,18 +79,19 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration) + public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, - expiration); + secretKey, expiration); } @Override public String createNonExpiringToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, 0L); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, null, + 0L); } @Override diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java index c4c7e8daa6..b6b272fa5f 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java @@ -10,7 +10,11 @@ import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; +import org.opencb.opencga.core.models.JwtPayload; +import org.opencb.opencga.core.models.federation.FederationClientParams; +import org.opencb.opencga.core.models.federation.FederationServerParams; import org.opencb.opencga.core.models.organizations.Organization; +import org.opencb.opencga.core.models.user.Account; import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; @@ -99,15 +103,50 @@ public String createToken(String organizationId, String authOriginId, String use return getOrganizationAuthenticationManager(organizationId, authOriginId).createToken(organizationId, userId); } - public void validateToken(String organizationId, String authOriginId, String token) throws CatalogException { - getOrganizationAuthenticationManager(organizationId, authOriginId).getUserId(token); + public void validateToken(String organizationId, Account.AuthenticationOrigin authenticationOrigin, JwtPayload jwtPayload) + throws CatalogException { + String secretKey = null; + if (authenticationOrigin.isFederation()) { + // The user is a federated user, so we need to use a different secret key + secretKey = getFederationSecretKey(organizationId, jwtPayload.getUserId()); + } + getOrganizationAuthenticationManager(organizationId, authenticationOrigin.getId()).validateToken(jwtPayload.getToken(), secretKey); } - public AuthenticationResponse authenticate(String organizationId, String authenticationOriginId, String userId, String password) - throws CatalogException { + public AuthenticationResponse authenticate(String organizationId, Account.AuthenticationOrigin authenticationOrigin, String userId, + String password) throws CatalogException { AuthenticationManager organizationAuthenticationManager = getOrganizationAuthenticationManager(organizationId, - authenticationOriginId); - return organizationAuthenticationManager.authenticate(organizationId, userId, password); + authenticationOrigin.getId()); + if (authenticationOrigin.isFederation()) { + // The user is a federated user, so we need to use a different secret key + String secretKey = getFederationSecretKey(organizationId, userId); + return organizationAuthenticationManager.authenticate(organizationId, userId, password, secretKey); + } else { + return organizationAuthenticationManager.authenticate(organizationId, userId, password); + } + } + + private String getFederationSecretKey(String organizationId, String userId) throws CatalogException { + QueryOptions options = new QueryOptions(QueryOptions.INCLUDE, OrganizationDBAdaptor.QueryParams.FEDERATION.key()); + Organization organization = catalogDBAdaptorFactory.getCatalogOrganizationDBAdaptor(organizationId).get(options).first(); + if (organization.getFederation() == null) { + throw new CatalogException("Could not find federation information for federated user '" + userId + "'"); + } + if (CollectionUtils.isNotEmpty(organization.getFederation().getServers())) { + for (FederationServerParams server : organization.getFederation().getServers()) { + if (server.getUserId().equals(userId)) { + return server.getSecretKey(); + } + } + } + if (CollectionUtils.isNotEmpty(organization.getFederation().getClients())) { + for (FederationClientParams client : organization.getFederation().getClients()) { + if (client.getUserId().equals(userId)) { + return client.getSecretKey(); + } + } + } + throw new CatalogException("Could not find federation information for federated user '" + userId + "'"); } public void changePassword(String organizationId, String authOriginId, String userId, String oldPassword, String newPassword) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java index 2c30ab281b..1ebf62b22d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/OrganizationDBAdaptor.java @@ -167,6 +167,11 @@ public static QueryParams getParam(String key) { // OpenCGAResult update(String organizationId, ObjectMap parameters, QueryOptions queryOptions) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException; + + OpenCGAResult updateFederationServerParams(String federationId, ObjectMap params) throws CatalogDBException; + + OpenCGAResult updateFederationClientParams(String federationId, ObjectMap params) throws CatalogDBException; + // // OpenCGAResult update(Query query, ObjectMap parameters, QueryOptions queryOptions) throws CatalogDBException; // diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java index 2df45729a5..fe3d4da6fd 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java @@ -27,8 +27,8 @@ import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; -import org.opencb.opencga.core.models.federation.FederationServer; -import org.opencb.opencga.core.models.federation.FederationClient; +import org.opencb.opencga.core.models.federation.FederationClientParams; +import org.opencb.opencga.core.models.federation.FederationServerParams; import org.opencb.opencga.core.models.organizations.Organization; import org.opencb.opencga.core.response.OpenCGAResult; import org.slf4j.LoggerFactory; @@ -412,8 +412,8 @@ private void fixFederationClientForRemoval(ObjectMap parameters) { } List federationParamList = new LinkedList<>(); for (Object federationClient : parameters.getAsList(QueryParams.FEDERATION_CLIENTS.key())) { - if (federationClient instanceof FederationServer) { - federationParamList.add(new Document("id", ((FederationServer) federationClient).getId())); + if (federationClient instanceof FederationServerParams) { + federationParamList.add(new Document("id", ((FederationServerParams) federationClient).getId())); } else { federationParamList.add(new Document("id", ((Map) federationClient).get("id"))); } @@ -427,8 +427,8 @@ private void fixFederationServerForRemoval(ObjectMap parameters) { } List federationParamList = new LinkedList<>(); for (Object federationServer : parameters.getAsList(QueryParams.FEDERATION_SERVERS.key())) { - if (federationServer instanceof FederationClient) { - federationParamList.add(new Document("id", ((FederationClient) federationServer).getId())); + if (federationServer instanceof FederationClientParams) { + federationParamList.add(new Document("id", ((FederationClientParams) federationServer).getId())); } else { federationParamList.add(new Document("id", ((Map) federationServer).get("id"))); } @@ -451,6 +451,35 @@ private void fixAuthOriginsForRemoval(ObjectMap parameters) { parameters.putNested(QueryParams.CONFIGURATION_AUTHENTICATION_ORIGINS.key(), authOriginParamList, false); } + @Override + public OpenCGAResult updateFederationServerParams(String federationId, ObjectMap params) throws CatalogDBException { + return updateFederationParams(QueryParams.FEDERATION_SERVERS.key(), federationId, params); + } + + @Override + public OpenCGAResult updateFederationClientParams(String federationId, ObjectMap params) throws CatalogDBException { + return updateFederationParams(QueryParams.FEDERATION_CLIENTS.key(), federationId, params); + } + + private OpenCGAResult updateFederationParams(String prefix, String federationId, ObjectMap params) + throws CatalogDBException { + long tmpStartTime = startQuery(); + + Document query = new Document(prefix + ".id", federationId); + Document paramsUpdate = new Document(); + for (Map.Entry entry : params.entrySet()) { + paramsUpdate.put(prefix + ".$." + entry.getKey(), entry.getValue()); + } + Document update = new Document("$set", paramsUpdate); + + DataResult updateResult = organizationCollection.update(null, query, update, null); + if (updateResult.getNumMatches() == 0) { + throw new CatalogDBException("Federation '" + federationId + "' not found."); + } + + return endWrite(tmpStartTime, updateResult); + } + @Override public OpenCGAResult delete(Organization organization) throws CatalogDBException { return null; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java index 92e36d3a79..8a86eb1f4d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java @@ -37,7 +37,7 @@ import org.opencb.opencga.core.models.audit.AuditRecord; import org.opencb.opencga.core.models.cohort.Cohort; import org.opencb.opencga.core.models.common.Enums; -import org.opencb.opencga.core.models.federation.FederationClientRef; +import org.opencb.opencga.core.models.federation.FederationClientParamsRef; import org.opencb.opencga.core.models.individual.Individual; import org.opencb.opencga.core.models.project.*; import org.opencb.opencga.core.models.sample.Sample; @@ -213,7 +213,7 @@ private void validateProjectForCreation(String organizationId, Project project) ProjectDBAdaptor.QueryParams.MODIFICATION_DATE.key())); project.setCurrentRelease(1); project.setInternal(ProjectInternal.init()); - project.setFederation(ParamUtils.defaultObject(project.getFederation(), new FederationClientRef())); + project.setFederation(ParamUtils.defaultObject(project.getFederation(), new FederationClientParamsRef())); if (StringUtils.isNotEmpty(project.getFederation().getId())) { FederationUtils.validateFederationId(organizationId, project.getFederation().getId(), catalogDBAdaptorFactory); project.getInternal().setFederated(true); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java index 56b342f3bd..9f45f18ded 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java @@ -50,7 +50,7 @@ import org.opencb.opencga.core.models.cohort.CohortPermissions; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.family.FamilyPermissions; -import org.opencb.opencga.core.models.federation.FederationClientRef; +import org.opencb.opencga.core.models.federation.FederationClientParamsRef; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.models.file.FileInternal; import org.opencb.opencga.core.models.file.FilePermissions; @@ -426,7 +426,7 @@ public OpenCGAResult create(String projectStr, Study study, QueryOptions study.setSources(ParamUtils.defaultObject(study.getSources(), Collections::emptyList)); study.setDescription(ParamUtils.defaultString(study.getDescription(), "")); study.setInternal(StudyInternal.init()); - study.setFederation(ParamUtils.defaultObject(study.getFederation(), new FederationClientRef("", "", ""))); + study.setFederation(ParamUtils.defaultObject(study.getFederation(), new FederationClientParamsRef("", "", ""))); if (StringUtils.isNotEmpty(study.getFederation().getId())) { FederationUtils.validateFederationId(organizationId, study.getFederation().getId(), catalogDBAdaptorFactory); study.getInternal().setFederated(true); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java index 363ff2802d..23e2eace64 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java @@ -63,8 +63,9 @@ */ public class UserManager extends AbstractManager { - static final QueryOptions INCLUDE_INTERNAL = new QueryOptions(QueryOptions.INCLUDE, Arrays.asList(UserDBAdaptor.QueryParams.ID.key(), - UserDBAdaptor.QueryParams.INTERNAL.key(), UserDBAdaptor.QueryParams.DEPRECATED_ACCOUNT.key())); + public static final QueryOptions INCLUDE_INTERNAL = new QueryOptions(QueryOptions.INCLUDE, + Arrays.asList(UserDBAdaptor.QueryParams.ID.key(), UserDBAdaptor.QueryParams.INTERNAL.key(), + UserDBAdaptor.QueryParams.DEPRECATED_ACCOUNT.key())); protected static Logger logger = LoggerFactory.getLogger(UserManager.class); private final CatalogIOManager catalogIOManager; private final AuthenticationFactory authenticationFactory; @@ -290,9 +291,9 @@ public JwtPayload validateToken(String token) throws CatalogException { ParamUtils.checkParameter(jwtPayload.getUserId(), "jwt user"); ParamUtils.checkParameter(jwtPayload.getOrganization(), "jwt organization"); - String authOrigin; + Account.AuthenticationOrigin authOrigin; if (ParamConstants.ANONYMOUS_USER_ID.equals(jwtPayload.getUserId())) { - authOrigin = CatalogAuthenticationManager.OPENCGA; + authOrigin = new Account.AuthenticationOrigin(CatalogAuthenticationManager.OPENCGA, false, false); } else { OpenCGAResult userResult = getUserDBAdaptor(jwtPayload.getOrganization()).get(jwtPayload.getUserId(), INCLUDE_INTERNAL); @@ -300,10 +301,10 @@ public JwtPayload validateToken(String token) throws CatalogException { throw new CatalogException("User '" + jwtPayload.getUserId() + "' could not be found."); } User user = userResult.first(); - authOrigin = user.getInternal().getAccount().getAuthentication().getId(); + authOrigin = user.getInternal().getAccount().getAuthentication(); } - authenticationFactory.validateToken(jwtPayload.getOrganization(), authOrigin, token); + authenticationFactory.validateToken(jwtPayload.getOrganization(), authOrigin, jwtPayload); return jwtPayload; } @@ -807,6 +808,7 @@ public AuthenticationResponse login(String organizationId, String username, Stri ParamUtils.checkParameter(password, "password"); String authId = null; + String userId = null; AuthenticationResponse response = null; if (StringUtils.isEmpty(organizationId)) { @@ -826,6 +828,7 @@ public AuthenticationResponse login(String organizationId, String username, Stri OpenCGAResult userOpenCGAResult = getUserDBAdaptor(organizationId).get(username, INCLUDE_INTERNAL); if (userOpenCGAResult.getNumResults() == 1) { User user = userOpenCGAResult.first(); + userId = user.getId(); // Only local OPENCGA users that are not superadmins can be automatically banned or their accounts be expired boolean userCanBeBanned = !ParamConstants.ADMIN_ORGANIZATION.equals(organizationId) && CatalogAuthenticationManager.OPENCGA.equals(user.getInternal().getAccount().getAuthentication().getId()); @@ -861,10 +864,10 @@ public AuthenticationResponse login(String organizationId, String username, Stri } } } - User user1 = userOpenCGAResult.first(); - authId = user1.getInternal().getAccount().getAuthentication().getId(); + Account.AuthenticationOrigin authentication = user.getInternal().getAccount().getAuthentication(); + authId = user.getInternal().getAccount().getAuthentication().getId(); try { - response = authenticationFactory.authenticate(organizationId, authId, username, password); + response = authenticationFactory.authenticate(organizationId, authentication, username, password); } catch (CatalogAuthenticationException e) { if (userCanBeBanned) { // We can only lock the account if it is not the root user @@ -901,11 +904,13 @@ public AuthenticationResponse login(String organizationId, String username, Stri try { response = authenticationManager.authenticate(organizationId, username, password); authId = entry.getKey(); + userId = authenticationManager.getUserId(response.getToken()); break; } catch (CatalogAuthenticationException e) { logger.debug("Attempted authentication failed with {} for user '{}'\n{}", entry.getKey(), username, e.getMessage(), e); } } + } if (response == null) { @@ -916,7 +921,6 @@ public AuthenticationResponse login(String organizationId, String username, Stri auditManager.auditUser(organizationId, username, Enums.Action.LOGIN, username, new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS)); - String userId = authenticationFactory.getUserId(organizationId, authId, response.getToken()); if (!CatalogAuthenticationManager.OPENCGA.equals(authId) && !CatalogAuthenticationManager.INTERNAL.equals(authId)) { // External authorization try { diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java index 2ba03ee135..4b0fd82eca 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java @@ -53,7 +53,7 @@ public void setUp() throws Exception { @Test public void testCreateJWTToken() throws Exception { - jwtToken = jwtSessionManager.createJWTToken(organizationId, null, "testUser", Collections.emptyMap(), null, 60L); + jwtToken = jwtSessionManager.createJWTToken(organizationId, null, "testUser", Collections.emptyMap(), null, null, 60L); } @Test @@ -81,7 +81,7 @@ public void testInvalidSecretKey() throws CatalogAuthenticationException { @Test public void testNonExpiringToken() throws CatalogException { - String nonExpiringToken = jwtSessionManager.createJWTToken(organizationId, null, "System", null, null, -1L); + String nonExpiringToken = jwtSessionManager.createJWTToken(organizationId, null, "System", null, null, null, -1L); assertEquals(jwtSessionManager.getUser(nonExpiringToken), "System"); assertNull(jwtSessionManager.getExpiration(nonExpiringToken)); } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java index b396fed114..115f8d4e03 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/JacksonUtils.java @@ -35,10 +35,10 @@ import org.opencb.opencga.core.models.common.mixins.VariantStatsJsonMixin; import org.opencb.opencga.core.models.family.Family; import org.opencb.opencga.core.models.family.FamilyMixin; -import org.opencb.opencga.core.models.federation.FederationServer; -import org.opencb.opencga.core.models.federation.FederationServerMixin; -import org.opencb.opencga.core.models.federation.FederationClient; -import org.opencb.opencga.core.models.federation.FederationClientMixin; +import org.opencb.opencga.core.models.federation.FederationServerParams; +import org.opencb.opencga.core.models.federation.FederationServerParamsMixin; +import org.opencb.opencga.core.models.federation.FederationClientParams; +import org.opencb.opencga.core.models.federation.FederationClientParamsMixin; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.models.file.FileMixin; import org.opencb.opencga.core.models.individual.Individual; @@ -113,8 +113,6 @@ private static ObjectMapper generateUpdateObjectMapper(JsonFactory jf) { objectMapper.addMixIn(VariableSet.class, PrivateUidMixin.class); objectMapper.addMixIn(ClinicalAnalysis.class, PrivateUidMixin.class); objectMapper.addMixIn(Interpretation.class, PrivateUidMixin.class); - objectMapper.addMixIn(FederationServer.class, FederationServerMixin.class); - objectMapper.addMixIn(FederationClient.class, FederationClientMixin.class); objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -149,8 +147,8 @@ private static ObjectMapper generateOpenCGAObjectMapper(JsonFactory jf) { objectMapper.addMixIn(VariableSet.class, PrivateUidMixin.class); objectMapper.addMixIn(ClinicalAnalysis.class, PrivateUidMixin.class); objectMapper.addMixIn(Interpretation.class, PrivateUidMixin.class); - objectMapper.addMixIn(FederationServer.class, FederationServerMixin.class); - objectMapper.addMixIn(FederationClient.class, FederationClientMixin.class); + objectMapper.addMixIn(FederationServerParams.class, FederationServerParamsMixin.class); + objectMapper.addMixIn(FederationClientParams.class, FederationClientParamsMixin.class); objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); return objectMapper; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java index 708970ca24..0c9267bd33 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java @@ -215,6 +215,8 @@ public enum Action { CHANGE_USER_CONFIG, FETCH_USER_CONFIG, + UPDATE_FEDERATION_SECRET_KEY, + INCREMENT_PROJECT_RELEASE, FETCH_STUDY_GROUPS, diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java index 69e82f3958..0ed053c6bd 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/Federation.java @@ -4,13 +4,13 @@ public class Federation { - private List clients; - private List servers; + private List clients; + private List servers; public Federation() { } - public Federation(List clients, List servers) { + public Federation(List clients, List servers) { this.clients = clients; this.servers = servers; } @@ -24,20 +24,20 @@ public String toString() { return sb.toString(); } - public List getClients() { + public List getClients() { return clients; } - public Federation setClients(List clients) { + public Federation setClients(List clients) { this.clients = clients; return this; } - public List getServers() { + public List getServers() { return servers; } - public Federation setServers(List servers) { + public Federation setServers(List servers) { this.servers = servers; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java similarity index 79% rename from opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java rename to opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java index e581e23e45..9ab0f0a317 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClient.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java @@ -3,7 +3,7 @@ import org.opencb.commons.annotations.DataField; import org.opencb.opencga.core.api.FieldConstants; -public class FederationClient { +public class FederationClientParams { @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, description = FieldConstants.FEDERATION_CLIENT_ID_DESCRIPTION) @@ -36,11 +36,11 @@ public class FederationClient { @DataField(id = "secretKey", description = FieldConstants.FEDERATION_CLIENT_SECRET_KEY_DESCRIPTION) private String secretKey; - public FederationClient() { + public FederationClientParams() { } - public FederationClient(String id, String description, String version, String email, String url, String organizationId, String userId, - String password, String token, String secretKey) { + public FederationClientParams(String id, String description, String version, String email, String url, String organizationId, + String userId, String password, String token, String secretKey) { this.id = id; this.description = description; this.version = version; @@ -73,7 +73,7 @@ public String getId() { return id; } - public FederationClient setId(String id) { + public FederationClientParams setId(String id) { this.id = id; return this; } @@ -82,7 +82,7 @@ public String getDescription() { return description; } - public FederationClient setDescription(String description) { + public FederationClientParams setDescription(String description) { this.description = description; return this; } @@ -91,15 +91,16 @@ public String getVersion() { return version; } - public void setVersion(String version) { + public FederationClientParams setVersion(String version) { this.version = version; + return this; } public String getEmail() { return email; } - public FederationClient setEmail(String email) { + public FederationClientParams setEmail(String email) { this.email = email; return this; } @@ -108,7 +109,7 @@ public String getUrl() { return url; } - public FederationClient setUrl(String url) { + public FederationClientParams setUrl(String url) { this.url = url; return this; } @@ -117,7 +118,7 @@ public String getOrganizationId() { return organizationId; } - public FederationClient setOrganizationId(String organizationId) { + public FederationClientParams setOrganizationId(String organizationId) { this.organizationId = organizationId; return this; } @@ -126,7 +127,7 @@ public String getUserId() { return userId; } - public FederationClient setUserId(String userId) { + public FederationClientParams setUserId(String userId) { this.userId = userId; return this; } @@ -135,7 +136,7 @@ public String getPassword() { return password; } - public FederationClient setPassword(String password) { + public FederationClientParams setPassword(String password) { this.password = password; return this; } @@ -144,7 +145,7 @@ public String getSecretKey() { return secretKey; } - public FederationClient setSecretKey(String secretKey) { + public FederationClientParams setSecretKey(String secretKey) { this.secretKey = secretKey; return this; } @@ -153,7 +154,8 @@ public String getToken() { return token; } - public void setToken(String token) { + public FederationClientParams setToken(String token) { this.token = token; + return this; } } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java similarity index 80% rename from opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java rename to opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java index 33186c144c..24df5661fe 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java @@ -3,5 +3,5 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties({"password", "secretKey", "token"}) -public class FederationClientMixin { +public class FederationClientParamsMixin { } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsRef.java similarity index 87% rename from opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java rename to opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsRef.java index 5140270888..f9ce500986 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientRef.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsRef.java @@ -3,7 +3,7 @@ import org.opencb.commons.annotations.DataField; import org.opencb.opencga.core.api.FieldConstants; -public class FederationClientRef { +public class FederationClientParamsRef { @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, description = FieldConstants.FEDERATION_CLIENT_ID_DESCRIPTION) @@ -15,14 +15,14 @@ public class FederationClientRef { @DataField(id = "version", description = FieldConstants.FEDERATION_CLIENT_VERSION_DESCRIPTION) private String version; - public FederationClientRef() { + public FederationClientParamsRef() { } - public FederationClientRef(String id) { + public FederationClientParamsRef(String id) { this(id, "", ""); } - public FederationClientRef(String id, String description, String version) { + public FederationClientParamsRef(String id, String description, String version) { this.id = id; this.description = description; this.version = version; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java similarity index 80% rename from opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java rename to opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java index 0379fcbbe8..e9e657460b 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServer.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java @@ -3,7 +3,7 @@ import org.opencb.commons.annotations.DataField; import org.opencb.opencga.core.api.FieldConstants; -public class FederationServer { +public class FederationServerParams { @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, description = FieldConstants.FEDERATION_SERVER_ID_DESCRIPTION) @@ -27,11 +27,11 @@ public class FederationServer { @DataField(id = "secretKey", description = FieldConstants.FEDERATION_SERVER_SECRET_KEY_DESCRIPTION) private String secretKey; - public FederationServer() { + public FederationServerParams() { } - public FederationServer(String id, String description, String email, String userId, boolean active, String expirationTime, - String secretKey) { + public FederationServerParams(String id, String description, String email, String userId, boolean active, String expirationTime, + String secretKey) { this.id = id; this.description = description; this.email = email; @@ -59,7 +59,7 @@ public String getId() { return id; } - public FederationServer setId(String id) { + public FederationServerParams setId(String id) { this.id = id; return this; } @@ -68,7 +68,7 @@ public String getDescription() { return description; } - public FederationServer setDescription(String description) { + public FederationServerParams setDescription(String description) { this.description = description; return this; } @@ -77,7 +77,7 @@ public String getEmail() { return email; } - public FederationServer setEmail(String email) { + public FederationServerParams setEmail(String email) { this.email = email; return this; } @@ -86,7 +86,7 @@ public String getUserId() { return userId; } - public FederationServer setUserId(String userId) { + public FederationServerParams setUserId(String userId) { this.userId = userId; return this; } @@ -95,7 +95,7 @@ public boolean isActive() { return active; } - public FederationServer setActive(boolean active) { + public FederationServerParams setActive(boolean active) { this.active = active; return this; } @@ -104,7 +104,7 @@ public String getExpirationTime() { return expirationTime; } - public FederationServer setExpirationTime(String expirationTime) { + public FederationServerParams setExpirationTime(String expirationTime) { this.expirationTime = expirationTime; return this; } @@ -113,7 +113,7 @@ public String getSecretKey() { return secretKey; } - public FederationServer setSecretKey(String secretKey) { + public FederationServerParams setSecretKey(String secretKey) { this.secretKey = secretKey; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java similarity index 78% rename from opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java rename to opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java index 3fdbe1af91..fb593eb917 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java @@ -3,5 +3,5 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties({"secretKey"}) -public class FederationServerMixin { +public class FederationServerParamsMixin { } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java index 9ca3f6d4f5..2b4641dce2 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/project/Project.java @@ -22,7 +22,7 @@ import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.storage.CellBaseConfiguration; import org.opencb.opencga.core.models.PrivateFields; -import org.opencb.opencga.core.models.federation.FederationClientRef; +import org.opencb.opencga.core.models.federation.FederationClientParamsRef; import org.opencb.opencga.core.models.study.Study; import java.util.HashMap; @@ -108,7 +108,7 @@ public class Project extends PrivateFields { private int currentRelease; @DataField(id = "federation", description = FieldConstants.PROJECT_FEDERATION) - private FederationClientRef federation; + private FederationClientParamsRef federation; @DataField(id = "studies", description = FieldConstants.PROJECT_STUDIES) @@ -171,7 +171,7 @@ public Project(Project project) { project.getCellbase(), project.getCurrentRelease(), project.getStudies(), project.getInternal(), project.getAttributes()); } - public Project(long uid, String id, String uuid, String name, String fqn, FederationClientRef federation, String creationDate, + public Project(long uid, String id, String uuid, String name, String fqn, FederationClientParamsRef federation, String creationDate, String modificationDate, String description, ProjectOrganism organism, CellBaseConfiguration cellbase, int currentRelease, List studies, ProjectInternal internal, Map attributes) { super(uid); @@ -299,11 +299,11 @@ public Project setOrganism(ProjectOrganism organism) { return this; } - public FederationClientRef getFederation() { + public FederationClientParamsRef getFederation() { return federation; } - public void setFederation(FederationClientRef federation) { + public void setFederation(FederationClientParamsRef federation) { this.federation = federation; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java index 087044c0b5..de1785b713 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/Study.java @@ -27,7 +27,7 @@ import org.opencb.opencga.core.models.common.AdditionalInfo; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.common.ExternalSource; -import org.opencb.opencga.core.models.federation.FederationClientRef; +import org.opencb.opencga.core.models.federation.FederationClientParamsRef; import org.opencb.opencga.core.models.notes.Note; import java.net.URI; @@ -103,7 +103,7 @@ public class Study extends PrivateFields { private String fqn; @DataField(id = "federation", description = FieldConstants.STUDY_FEDERATION) - private FederationClientRef federation; + private FederationClientParamsRef federation; @DataField(id = "notification", description = FieldConstants.STUDY_NOTIFICATION) @@ -309,11 +309,11 @@ public Study setModificationDate(String modificationDate) { return this; } - public FederationClientRef getFederation() { + public FederationClientParamsRef getFederation() { return federation; } - public void setFederation(FederationClientRef federation) { + public void setFederation(FederationClientParamsRef federation) { this.federation = federation; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java index 16b2bca684..6ca4eb6037 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/user/Account.java @@ -56,6 +56,10 @@ public Account() { this.authentication = null; } + public Account(String expirationDate, AuthenticationOrigin authentication) { + this(expirationDate, new Password(), 0, authentication); + } + public Account(String expirationDate, Password password, int failedAttempts, AuthenticationOrigin authentication) { this.expirationDate = expirationDate; this.password = password; From 6ddaa78ef6b9a43ed73faa3092331b1746b2b2c1 Mon Sep 17 00:00:00 2001 From: pfurio Date: Fri, 17 Jan 2025 11:45:14 +0100 Subject: [PATCH 098/122] wip --- .../OrganizationMongoDBAdaptorFactory.java | 2 +- .../FederationServerCreateParams.java | 77 ------------------- 2 files changed, 1 insertion(+), 78 deletions(-) delete mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptorFactory.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptorFactory.java index 9884bbccc6..be76b21970 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptorFactory.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptorFactory.java @@ -121,7 +121,7 @@ public class OrganizationMongoDBAdaptorFactory { private final NoteMongoDBAdaptor notesDBAdaptor; private final OrganizationMongoDBAdaptor organizationDBAdaptor; - private UserMongoDBAdaptor userDBAdaptor; + private final UserMongoDBAdaptor userDBAdaptor; private final ProjectMongoDBAdaptor projectDBAdaptor; private final StudyMongoDBAdaptor studyDBAdaptor; private final IndividualMongoDBAdaptor individualDBAdaptor; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java deleted file mode 100644 index 4f9eb24e16..0000000000 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerCreateParams.java +++ /dev/null @@ -1,77 +0,0 @@ -package org.opencb.opencga.core.models.federation; - -import org.opencb.commons.annotations.DataField; -import org.opencb.opencga.core.api.FieldConstants; - -public class FederationServerCreateParams { - - @DataField(id = "id", required = true, indexed = true, unique = true, immutable = true, - description = FieldConstants.FEDERATION_CLIENT_ID_DESCRIPTION) - private String id; - - @DataField(id = "description", description = FieldConstants.FEDERATION_CLIENT_DESCRIPTION_DESCRIPTION) - private String description; - - @DataField(id = "email", description = FieldConstants.FEDERATION_CLIENT_EMAIL_DESCRIPTION) - private String email; - - @DataField(id = "userId", description = FieldConstants.FEDERATION_CLIENT_USER_ID_DESCRIPTION) - private String userId; - - public FederationServerCreateParams() { - } - - public FederationServerCreateParams(String id, String description, String email, String userId) { - this.id = id; - this.description = description; - this.email = email; - this.userId = userId; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("FederationServerCreateParams{"); - sb.append("id='").append(id).append('\''); - sb.append(", description='").append(description).append('\''); - sb.append(", email='").append(email).append('\''); - sb.append(", userId='").append(userId).append('\''); - sb.append('}'); - return sb.toString(); - } - - public String getId() { - return id; - } - - public FederationServerCreateParams setId(String id) { - this.id = id; - return this; - } - - public String getDescription() { - return description; - } - - public FederationServerCreateParams setDescription(String description) { - this.description = description; - return this; - } - - public String getEmail() { - return email; - } - - public FederationServerCreateParams setEmail(String email) { - this.email = email; - return this; - } - - public String getUserId() { - return userId; - } - - public FederationServerCreateParams setUserId(String userId) { - this.userId = userId; - return this; - } -} From 74dda6731ac762e100ac706908bea033f9482a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 17 Jan 2025 21:29:19 +0000 Subject: [PATCH 099/122] storage: Fix hadoop compatibility issues. #TASK-7320 --- .../opencga/storage/hadoop/HBaseCompatApi.java | 2 ++ .../opencga/storage/hadoop/HBaseCompat.java | 6 ++++++ .../opencga/storage/hadoop/HBaseCompat.java | 6 ++++++ .../pom.xml | 2 +- .../opencga/storage/hadoop/HBaseCompat.java | 6 ++++++ .../opencga-storage-hadoop-core/pom.xml | 3 +++ .../hadoop/variant/io/VariantExporterDriver.java | 4 ++-- .../hadoop/variant/mr/StreamVariantDriver.java | 4 ++-- .../variant/mr/VariantFileOutputFormat.java | 16 ++++++++++++++-- 9 files changed, 42 insertions(+), 7 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java index 48ab8846f6..b9d6a9dd31 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java @@ -36,4 +36,6 @@ public static HBaseCompatApi getInstance() { public abstract List getServerList(Admin admin) throws IOException; public abstract byte[][] getTableStartKeys(Admin admin, Table table) throws IOException; + + public abstract boolean isSnappyAvailable(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java index bb74dabf4c..923c061c83 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -7,6 +7,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.compress.SnappyCodec; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompat; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompatApi; @@ -45,4 +46,9 @@ public byte[][] getTableStartKeys(Admin admin, Table table) throws IOException { } return startKeys; } + + @Override + public boolean isSnappyAvailable() { + return SnappyCodec.isNativeCodeLoaded(); + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java index f7cf534508..455d57aa26 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -7,6 +7,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.compress.SnappyCodec; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompat; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompatApi; @@ -45,4 +46,9 @@ public byte[][] getTableStartKeys(Admin admin, Table table) throws IOException { } return startKeys; } + + @Override + public boolean isSnappyAvailable() { + return SnappyCodec.isNativeCodeLoaded(); + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index a86f33deb4..66134df8dc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -32,7 +32,7 @@ 2.4.17 - 2.10.0 + 3.3.4 5.1.3 1.1.0 diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java index 194b47b779..d680840cb3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -38,4 +38,10 @@ public List getServerList(Admin admin) throws IOException { public byte[][] getTableStartKeys(Admin admin, Table table) throws IOException { return table.getRegionLocator().getStartKeys(); } + + @Override + public boolean isSnappyAvailable() { + // [HADOOP-17125] - Using snappy-java in SnappyCodec - 3.3.1, 3.4.0 + return true; + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index f41b6d7b0f..48b381275c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -448,6 +448,9 @@ org.apache.tephra:tephra-core + + org.apache.tephra:tephra-core-shaded + com.lmax:disruptor diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index a26dd84d6b..d288fbf609 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -9,7 +9,6 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DeflateCodec; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; @@ -24,6 +23,7 @@ import org.opencb.biodata.models.variant.avro.GeneCancerAssociation; import org.opencb.biodata.models.variant.avro.VariantAvro; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.hadoop.HBaseCompat; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.mr.VariantFileOutputFormat; import org.opencb.opencga.storage.hadoop.variant.mr.VariantLocusKey; @@ -144,7 +144,7 @@ protected void setupJob(Job job) throws IOException { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); outputFormatClass = LazyOutputFormat.class; } - if (SnappyCodec.isNativeCodeLoaded()) { + if (HBaseCompat.getInstance().isSnappyAvailable()) { FileOutputFormat.setCompressOutput(job, true); // FIXME: SnappyCodec might not be available in client side // FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java index 960196f6f1..e25c9be620 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/StreamVariantDriver.java @@ -5,7 +5,6 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DeflateCodec; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; @@ -17,6 +16,7 @@ import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.hadoop.HBaseCompat; import org.opencb.opencga.storage.hadoop.utils.ValueOnlyTextOutputFormat; import org.opencb.opencga.storage.hadoop.variant.io.VariantDriver; import org.slf4j.Logger; @@ -164,7 +164,7 @@ protected void setupJob(Job job) throws IOException { outputFormatClass = LazyOutputFormat.class; job.setOutputFormatClass(ValueOnlyTextOutputFormat.class); - if (SnappyCodec.isNativeCodeLoaded()) { + if (HBaseCompat.getInstance().isSnappyAvailable()) { FileOutputFormat.setCompressOutput(job, true); // FIXME: SnappyCodec might not be available in client side // FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java index 248bcc5d16..445dfeb587 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantFileOutputFormat.java @@ -20,7 +20,6 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; @@ -54,6 +53,15 @@ */ public class VariantFileOutputFormat extends FileOutputFormat { + private static Class abfsOutputStreamClass; + + static { + try { + abfsOutputStreamClass = Class.forName("org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream"); + } catch (ClassNotFoundException e) { + abfsOutputStreamClass = null; + } + } public static final String VARIANT_OUTPUT_FORMAT = "variant.output_format"; @@ -74,7 +82,7 @@ public RecordWriter getRecordWriter(TaskAttemptContext jo FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fsOs = fs.create(file, false); OutputStream out; - if (fsOs.getWrappedStream() instanceof AbfsOutputStream) { + if (isAbfsOutputStream(fsOs)) { // Disable flush on ABFS. See HADOOP-16548 out = new FilterOutputStream(fsOs) { @Override @@ -92,6 +100,10 @@ public void flush() throws IOException { return new VariantRecordWriter(configureWriter(job, countingOut), countingOut); } + private static boolean isAbfsOutputStream(FSDataOutputStream fsOs) { + return abfsOutputStreamClass != null && abfsOutputStreamClass.isInstance(fsOs.getWrappedStream()); + } + private DataWriter configureWriter(final TaskAttemptContext job, OutputStream fileOut) throws IOException { // job.getCounter(VcfDataWriter.class.getName(), "failed").increment(0); // init final Configuration conf = job.getConfiguration(); From e1d58353e1aedfc524720105f5e38d6d7b505242 Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 20 Jan 2025 12:11:21 +0100 Subject: [PATCH 100/122] app: add federation migration, #TASK-7192 --- .../catalog/FederationChangesMigration.java | 69 +++++++++++++++++++ .../azure/AuthenticationFactory.java | 4 +- .../catalog/managers/AuditManager.java | 7 ++ .../db/mongodb/AuditMongoDBAdaptorTest.java | 6 +- .../opencga/core/api/FieldConstants.java | 4 +- .../core/models/audit/AuditRecord.java | 8 +-- .../opencga/core/models/common/Enums.java | 6 -- .../federation/FederationClientParams.java | 18 ++--- .../FederationClientParamsMixin.java | 2 +- .../federation/FederationServerParams.java | 33 +++------ .../FederationServerParamsMixin.java | 2 +- 11 files changed, 107 insertions(+), 52 deletions(-) create mode 100644 opencga-app/src/main/java/org/opencb/opencga/app/migrations/v4/v4_0_0/catalog/FederationChangesMigration.java diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v4/v4_0_0/catalog/FederationChangesMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v4/v4_0_0/catalog/FederationChangesMigration.java new file mode 100644 index 0000000000..47ce704da7 --- /dev/null +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v4/v4_0_0/catalog/FederationChangesMigration.java @@ -0,0 +1,69 @@ +package org.opencb.opencga.app.migrations.v4.v4_0_0.catalog; + +import com.mongodb.client.MongoCollection; +import com.mongodb.client.model.Filters; +import com.mongodb.client.model.Updates; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; +import org.opencb.opencga.catalog.migration.Migration; +import org.opencb.opencga.catalog.migration.MigrationTool; + +import java.util.Arrays; +import java.util.Collections; + +@Migration(id = "federationChanges__task_7192", + description = "Federation changes, #TASK-7192", version = "4.0.0", + language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20250120) +public class FederationChangesMigration extends MigrationTool { + + /* + * [NEW] Organization -> federation: {clients: [], servers: []} + * [NEW] Project -> federation: {id: "", description: "", version: ""} + * -> internal.federated: [true|false] + * [NEW] Study -> federation: {id: "", description: "", version: ""} + * -> internal.federated: [true|false] + * [NEW] User -> internal.account.authentication.federation: [true|false] + */ + + @Override + protected void run() throws Exception { + // Organization update + MongoCollection orgCollection = getMongoCollection(OrganizationMongoDBAdaptorFactory.ORGANIZATION_COLLECTION); + Bson query = Filters.exists("federation", false); + Bson update = Updates.set("federation", new Document() + .append("clients", Collections.emptyList()) + .append("servers", Collections.emptyList()) + ); + orgCollection.updateMany(query, update); + + // Project and Study + Bson projectStudyQuery = Filters.exists("federation", false); + Bson projectStudyUpdate = Updates.combine( + Updates.set("federation", new Document() + .append("id", "") + .append("description", "") + .append("version", "") + ), + Updates.set("internal.federated", false) + ); + for (String collectionStr : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, + OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION, OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, + OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) { + getMongoCollection(collectionStr).updateMany(projectStudyQuery, projectStudyUpdate); + } + + // User + Bson userQuery = Filters.exists("internal.account.authentication.federation", false); + Bson userUpdate = Updates.set("internal.account.authentication.federation", false); + for (String collectionStr : Arrays.asList(OrganizationMongoDBAdaptorFactory.USER_COLLECTION, + OrganizationMongoDBAdaptorFactory.DELETED_USER_COLLECTION)) { + getMongoCollection(collectionStr).updateMany(userQuery, userUpdate); + } + + // Drop project id index (no longer unique) + Document oldIndex = new Document() + .append("id", 1); + dropIndex(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION, oldIndex); + } +} diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java index b6b272fa5f..48bf2ee07a 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java @@ -135,14 +135,14 @@ private String getFederationSecretKey(String organizationId, String userId) thro if (CollectionUtils.isNotEmpty(organization.getFederation().getServers())) { for (FederationServerParams server : organization.getFederation().getServers()) { if (server.getUserId().equals(userId)) { - return server.getSecretKey(); + return server.getSecurityKey(); } } } if (CollectionUtils.isNotEmpty(organization.getFederation().getClients())) { for (FederationClientParams client : organization.getFederation().getClients()) { if (client.getUserId().equals(userId)) { - return client.getSecretKey(); + return client.getSecurityKey(); } } } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java index 6c25baa36f..7d991dff2a 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java @@ -200,6 +200,13 @@ public void audit(String organizationId, String operationId, String userId, Enum public void audit(String organizationId, String operationId, String userId, Enums.Action action, Enums.Resource resource, String resourceId, String resourceUuid, String studyId, String studyUuid, ObjectMap params, AuditRecord.Status status, ObjectMap attributes) { + audit(organizationId, operationId, userId, action.name(), resource, resourceId, resourceUuid, studyId, studyUuid, params, status, + attributes); + } + + protected void audit(String organizationId, String operationId, String userId, String action, Enums.Resource resource, + String resourceId, String resourceUuid, String studyId, String studyUuid, ObjectMap params, AuditRecord.Status status, + ObjectMap attributes) { String apiVersion = GitRepositoryState.getInstance().getBuildVersion(); Date date = TimeUtils.getDate(); diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/db/mongodb/AuditMongoDBAdaptorTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/db/mongodb/AuditMongoDBAdaptorTest.java index 228ca61f98..68a165ba7a 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/db/mongodb/AuditMongoDBAdaptorTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/db/mongodb/AuditMongoDBAdaptorTest.java @@ -37,17 +37,17 @@ public class AuditMongoDBAdaptorTest extends AbstractMongoDBAdaptorTest { public void testInsertAuditRecord() throws Exception { dbAdaptorFactory.getCatalogAuditDbAdaptor(organizationId) .insertAuditRecord(new AuditRecord(UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), - UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), "user", "api", Enums.Action.CREATE, + UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), "user", "api", Enums.Action.CREATE.name(), Enums.Resource.SAMPLE, "sampleId", "sampleUuid", "studyId", "studyUuid", new ObjectMap(), new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS), TimeUtils.getDate(), new ObjectMap())); dbAdaptorFactory.getCatalogAuditDbAdaptor(organizationId) .insertAuditRecord(new AuditRecord(UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), - UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), "user", "api", Enums.Action.CREATE, + UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), "user", "api", Enums.Action.CREATE.name(), Enums.Resource.SAMPLE, "sampleId2", "sampleUuid2", "studyId", "studyUuid", new ObjectMap(), new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS), TimeUtils.getDate(), new ObjectMap())); dbAdaptorFactory.getCatalogAuditDbAdaptor(organizationId) .insertAuditRecord(new AuditRecord(UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), - UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), "user", "api", Enums.Action.CREATE, + UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.AUDIT), "user", "api", Enums.Action.CREATE.name(), Enums.Resource.SAMPLE, "sampleId3", "sampleUuid3", "studyId", "studyUuid", new ObjectMap(), new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS), TimeUtils.getDate(), new ObjectMap())); } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java b/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java index b913f1d79d..0b22a122b2 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/api/FieldConstants.java @@ -81,13 +81,13 @@ public class FieldConstants { public static final String FEDERATION_CLIENT_USER_ID_DESCRIPTION = "User ID to access the federation server."; public static final String FEDERATION_CLIENT_PASSWORD_DESCRIPTION = "User password to access the federation server."; public static final String FEDERATION_CLIENT_TOKEN_DESCRIPTION = "Token to access the federation server."; - public static final String FEDERATION_CLIENT_SECRET_KEY_DESCRIPTION = "Secret key auto-generated by the federation server."; + public static final String FEDERATION_CLIENT_SECURITY_KEY_DESCRIPTION = "Security key auto-generated by the federation server."; public static final String FEDERATION_SERVER_ID_DESCRIPTION = "Unique ID to identify the federation client."; public static final String FEDERATION_SERVER_DESCRIPTION_DESCRIPTION = "Description of the federation client."; public static final String FEDERATION_SERVER_EMAIL_DESCRIPTION = "Contact email of the federation client."; public static final String FEDERATION_SERVER_USER_ID_DESCRIPTION = "User ID to be used by the federation client."; - public static final String FEDERATION_SERVER_SECRET_KEY_DESCRIPTION = "Secret key shared with the federation client to gain access" + public static final String FEDERATION_SERVER_SECURITY_KEY_DESCRIPTION = "Security key shared with the federation client to gain access" + " to the server."; public static final String FEDERATION_SERVER_ACTIVE_DESCRIPTION = "Flag to indicate if the federation client account is active."; public static final String FEDERATION_SERVER_EXPIRATION_TIME_DESCRIPTION = "Expiration time of the federation client account."; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/audit/AuditRecord.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/audit/AuditRecord.java index f8ba4b924b..11ba4454ed 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/audit/AuditRecord.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/audit/AuditRecord.java @@ -47,7 +47,7 @@ public class AuditRecord { /** * Action performed (CREATE, SEARCH, DOWNLOAD...). */ - private Enums.Action action; + private String action; /** * Involved resource (User, Study, Sample, File...). @@ -93,7 +93,7 @@ public class AuditRecord { public AuditRecord() { } - public AuditRecord(String id, String operationId, String userId, String apiVersion, Enums.Action action, Enums.Resource resource, + public AuditRecord(String id, String operationId, String userId, String apiVersion, String action, Enums.Resource resource, String resourceId, String resourceUuid, String studyId, String studyUuid, ObjectMap params, Status status, Date date, ObjectMap attributes) { this.id = id; @@ -169,11 +169,11 @@ public AuditRecord setApiVersion(String apiVersion) { return this; } - public Enums.Action getAction() { + public String getAction() { return action; } - public AuditRecord setAction(Enums.Action action) { + public AuditRecord setAction(String action) { this.action = action; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java index 0c9267bd33..0617d6c65a 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/Enums.java @@ -214,9 +214,6 @@ public enum Action { RESET_USER_PASSWORD, CHANGE_USER_CONFIG, FETCH_USER_CONFIG, - - UPDATE_FEDERATION_SECRET_KEY, - INCREMENT_PROJECT_RELEASE, FETCH_STUDY_GROUPS, @@ -261,9 +258,6 @@ public enum Action { IMPORT_EXTERNAL_GROUP_OF_USERS, SYNC_EXTERNAL_GROUP_OF_USERS, - CREATE_FEDERATION_CLIENT, - EXPOSE_FEDERATION_SERVER, - // RGA RESET_RGA_INDEXES, UPDATE_RGA_INDEX, diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java index 9ab0f0a317..459079178d 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParams.java @@ -33,14 +33,14 @@ public class FederationClientParams { @DataField(id = "token", description = FieldConstants.FEDERATION_CLIENT_TOKEN_DESCRIPTION) private String token; - @DataField(id = "secretKey", description = FieldConstants.FEDERATION_CLIENT_SECRET_KEY_DESCRIPTION) - private String secretKey; + @DataField(id = "securityKey", description = FieldConstants.FEDERATION_CLIENT_SECURITY_KEY_DESCRIPTION) + private String securityKey; public FederationClientParams() { } public FederationClientParams(String id, String description, String version, String email, String url, String organizationId, - String userId, String password, String token, String secretKey) { + String userId, String password, String token, String securityKey) { this.id = id; this.description = description; this.version = version; @@ -50,7 +50,7 @@ public FederationClientParams(String id, String description, String version, Str this.userId = userId; this.password = password; this.token = token; - this.secretKey = secretKey; + this.securityKey = securityKey; } @Override @@ -64,7 +64,7 @@ public String toString() { sb.append(", organizationId='").append(organizationId).append('\''); sb.append(", userId='").append(userId).append('\''); sb.append(", password='").append("xxxxxxxx").append('\''); - sb.append(", secretKey='").append("xxxxxxxx").append('\''); + sb.append(", securityKey='").append("xxxxxxxx").append('\''); sb.append('}'); return sb.toString(); } @@ -141,12 +141,12 @@ public FederationClientParams setPassword(String password) { return this; } - public String getSecretKey() { - return secretKey; + public String getSecurityKey() { + return securityKey; } - public FederationClientParams setSecretKey(String secretKey) { - this.secretKey = secretKey; + public FederationClientParams setSecurityKey(String securityKey) { + this.securityKey = securityKey; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java index 24df5661fe..ecf0be95ca 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java @@ -2,6 +2,6 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -@JsonIgnoreProperties({"password", "secretKey", "token"}) +@JsonIgnoreProperties({"password", "securityKey", "token"}) public class FederationClientParamsMixin { } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java index e9e657460b..59860fd729 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParams.java @@ -21,24 +21,19 @@ public class FederationServerParams { @DataField(id = "active", description = FieldConstants.FEDERATION_SERVER_ACTIVE_DESCRIPTION) private boolean active; - @DataField(id = "expirationTime", description = FieldConstants.FEDERATION_SERVER_EXPIRATION_TIME_DESCRIPTION) - private String expirationTime; - - @DataField(id = "secretKey", description = FieldConstants.FEDERATION_SERVER_SECRET_KEY_DESCRIPTION) - private String secretKey; + @DataField(id = "securityKey", description = FieldConstants.FEDERATION_SERVER_SECURITY_KEY_DESCRIPTION) + private String securityKey; public FederationServerParams() { } - public FederationServerParams(String id, String description, String email, String userId, boolean active, String expirationTime, - String secretKey) { + public FederationServerParams(String id, String description, String email, String userId, boolean active, String securityKey) { this.id = id; this.description = description; this.email = email; this.userId = userId; this.active = active; - this.expirationTime = expirationTime; - this.secretKey = secretKey; + this.securityKey = securityKey; } @Override @@ -49,8 +44,7 @@ public String toString() { sb.append(", email='").append(email).append('\''); sb.append(", userId='").append(userId).append('\''); sb.append(", active=").append(active); - sb.append(", expirationTime='").append(expirationTime).append('\''); - sb.append(", secretKey='").append("xxxxxxxx").append('\''); + sb.append(", securityKey='").append("xxxxxxxx").append('\''); sb.append('}'); return sb.toString(); } @@ -100,21 +94,12 @@ public FederationServerParams setActive(boolean active) { return this; } - public String getExpirationTime() { - return expirationTime; - } - - public FederationServerParams setExpirationTime(String expirationTime) { - this.expirationTime = expirationTime; - return this; - } - - public String getSecretKey() { - return secretKey; + public String getSecurityKey() { + return securityKey; } - public FederationServerParams setSecretKey(String secretKey) { - this.secretKey = secretKey; + public FederationServerParams setSecurityKey(String securityKey) { + this.securityKey = securityKey; return this; } } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java index fb593eb917..76d918a416 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationServerParamsMixin.java @@ -2,6 +2,6 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -@JsonIgnoreProperties({"secretKey"}) +@JsonIgnoreProperties({"securityKey"}) public class FederationServerParamsMixin { } From a2dc0781239b685ffea318671438e04cb7227ae2 Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 20 Jan 2025 16:29:52 +0100 Subject: [PATCH 101/122] catalog: check federated users can't access fed studies, #TASK-7192 --- .../catalog/db/api/ProjectDBAdaptor.java | 1 + .../catalog/db/api/StudyDBAdaptor.java | 2 + .../mongodb/OrganizationMongoDBAdaptor.java | 11 +++ .../ProjectCatalogMongoDBIterator.java | 54 ++++++++++++ .../StudyCatalogMongoDBIterator.java | 88 ++++++++++++++----- .../catalog/managers/AbstractManager.java | 21 +++++ .../catalog/managers/CohortManager.java | 9 +- .../catalog/managers/FamilyManager.java | 9 +- .../opencga/catalog/managers/FileManager.java | 9 +- .../catalog/managers/IndividualManager.java | 9 +- .../opencga/catalog/managers/JobManager.java | 9 +- .../catalog/managers/OrganizationManager.java | 20 ++++- .../catalog/managers/PanelManager.java | 9 +- .../catalog/managers/SampleManager.java | 9 +- .../catalog/managers/StudyManager.java | 30 ++++++- 15 files changed, 258 insertions(+), 32 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java index e8107017ef..dc90249809 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/ProjectDBAdaptor.java @@ -48,6 +48,7 @@ enum QueryParams implements QueryParam { MODIFICATION_DATE("modificationDate", DATE, ""), DESCRIPTION("description", TEXT_ARRAY, ""), ORGANIZATION("organization", TEXT, ""), + FEDERATION("federation", OBJECT, ""), CELLBASE("cellbase", OBJECT, ""), ORGANISM("organism", TEXT_ARRAY, ""), ORGANISM_SCIENTIFIC_NAME("organism.scientificName", TEXT, ""), diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java index ff925097f1..3d2e0e95b3 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java @@ -420,12 +420,14 @@ enum QueryParams implements QueryParam { MODIFICATION_DATE("modificationDate", DATE, ""), DESCRIPTION("description", TEXT, ""), TYPE("type", OBJECT, ""), + FEDERATION("federation", OBJECT, ""), SOURCES("sources", TEXT_ARRAY, ""), NOTES("notes", OBJECT, ""), STATUS("status", TEXT_ARRAY, ""), STATUS_ID("status.id", TEXT, ""), STATUS_DATE("status.date", TEXT, ""), STATUS_DESCRIPTION("status.description", TEXT, ""), + INTERNAL("internal", OBJECT, ""), INTERNAL_FEDERATED("internal.federated", BOOLEAN, ""), INTERNAL_STATUS("internal.status", TEXT_ARRAY, ""), INTERNAL_STATUS_ID("internal.status.id", TEXT, ""), diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java index fe3d4da6fd..2f17c1101d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java @@ -119,6 +119,12 @@ OpenCGAResult get(ClientSession clientSession, String user, QueryO } } + public OpenCGAResult nativeGet(ClientSession clientSession, String user, QueryOptions options) throws CatalogDBException { + long startTime = startQuery(); + try (DBIterator dbIterator = nativeIterator(clientSession, user, options)) { + return endQuery(startTime, dbIterator); + } + } @Override public OpenCGAResult update(String organizationId, ObjectMap parameters, QueryOptions queryOptions) @@ -495,6 +501,11 @@ public DBIterator iterator(ClientSession clientSession, String use return new OrganizationCatalogMongoDBIterator<>(mongoCursor, clientSession, organizationConverter, dbAdaptorFactory, options, user); } + public DBIterator nativeIterator(ClientSession clientSession, String user, QueryOptions options) throws CatalogDBException { + MongoDBIterator mongoCursor = getMongoCursor(clientSession, options); + return new OrganizationCatalogMongoDBIterator<>(mongoCursor, clientSession, null, dbAdaptorFactory, options, user); + } + private MongoDBIterator getMongoCursor(ClientSession clientSession, QueryOptions options) { QueryOptions qOptions; if (options != null) { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/ProjectCatalogMongoDBIterator.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/ProjectCatalogMongoDBIterator.java index 8cfde189a0..44a4971093 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/ProjectCatalogMongoDBIterator.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/ProjectCatalogMongoDBIterator.java @@ -7,9 +7,11 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter; import org.opencb.commons.datastore.mongodb.MongoDBIterator; +import org.opencb.opencga.catalog.db.api.OrganizationDBAdaptor; import org.opencb.opencga.catalog.db.api.ProjectDBAdaptor; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptor; +import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; import org.opencb.opencga.catalog.db.mongodb.StudyMongoDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogAuthorizationException; @@ -26,10 +28,12 @@ public class ProjectCatalogMongoDBIterator extends CatalogMongoDBIterator private final QueryOptions options; private boolean includeStudyInfo; + private final OrganizationMongoDBAdaptor organizationMongoDBAdaptor; private final StudyMongoDBAdaptor studyDBAdaptor; private QueryOptions studyQueryOptions; private final Queue projectListBuffer; + private List federationClients; private final Logger logger; @@ -42,6 +46,7 @@ public ProjectCatalogMongoDBIterator(MongoDBIterator mongoCursor, Clie QueryOptions options, String user) { super(mongoCursor, clientSession, converter, null); + this.organizationMongoDBAdaptor = dbAdaptorFactory.getCatalogOrganizationDBAdaptor(); this.studyDBAdaptor = dbAdaptorFactory.getCatalogStudyDBAdaptor(); this.options = options != null ? new QueryOptions(options) : new QueryOptions(); @@ -52,6 +57,7 @@ public ProjectCatalogMongoDBIterator(MongoDBIterator mongoCursor, Clie this.user = user; + this.federationClients = null; this.projectListBuffer = new LinkedList<>(); this.logger = LoggerFactory.getLogger(ProjectCatalogMongoDBIterator.class); } @@ -97,6 +103,7 @@ private void fetchNextBatch() { } } + addFederationRef(); if (!projectUidSet.isEmpty()) { OpenCGAResult studyResult; Query studyQuery = new Query(StudyDBAdaptor.QueryParams.PROJECT_UID.key(), new ArrayList<>(projectUidSet)); @@ -132,6 +139,53 @@ private void fetchNextBatch() { } } + private List getFederationClients() { + if (federationClients == null) { + // The study is federated. We need to fetch the information from the corresponding collection + QueryOptions orgOptions = new QueryOptions(QueryOptions.INCLUDE, OrganizationDBAdaptor.QueryParams.FEDERATION_CLIENTS.key()); + try { + Document organization = organizationMongoDBAdaptor.nativeGet(clientSession, user, orgOptions).first(); + Document orgFederation = organization.get(OrganizationDBAdaptor.QueryParams.FEDERATION.key(), Document.class); + if (orgFederation == null) { + logger.warn("Federation information could not be filled in. Organization was not found."); + // Remove null so we don't try to fetch the information again + federationClients = Collections.emptyList(); + return federationClients; + } + federationClients = orgFederation.getList("clients", Document.class); + if (federationClients == null) { + logger.warn("Federation information could not be filled in. Federation clients were not found."); + // Remove null so we don't try to fetch the information again + federationClients = Collections.emptyList(); + } + } catch (CatalogDBException e) { + logger.warn("Could not obtain the Organization information", e); + } + } + return federationClients; + } + + private void addFederationRef() { + projectListBuffer.forEach(project -> { + Document federation = project.get(ProjectDBAdaptor.QueryParams.FEDERATION.key(), Document.class); + if (federation == null) { + return; + } + String federationId = federation.getString("id"); + if (StringUtils.isEmpty(federationId)) { + return; + } + List federationClients = getFederationClients(); + for (Document client : federationClients) { + String clientId = client.getString("id"); + if (federationId.equals(clientId)) { + project.put(ProjectDBAdaptor.QueryParams.FEDERATION.key(), client); + return; + } + } + }); + } + private boolean includeStudyInfo() { if (options == null) { return true; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/StudyCatalogMongoDBIterator.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/StudyCatalogMongoDBIterator.java index 957fb94a25..adf5a24e81 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/StudyCatalogMongoDBIterator.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/iterators/StudyCatalogMongoDBIterator.java @@ -24,9 +24,11 @@ import org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter; import org.opencb.commons.datastore.mongodb.MongoDBIterator; import org.opencb.opencga.catalog.db.api.NoteDBAdaptor; +import org.opencb.opencga.catalog.db.api.OrganizationDBAdaptor; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.AuthorizationMongoDBUtils; import org.opencb.opencga.catalog.db.mongodb.NoteMongoDBAdaptor; +import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; import org.opencb.opencga.catalog.exceptions.CatalogDBException; import org.opencb.opencga.catalog.utils.ParamUtils; @@ -48,6 +50,7 @@ public class StudyCatalogMongoDBIterator extends CatalogMongoDBIterator { private final Function studyFilter; private final String user; private final QueryOptions options; + private final OrganizationMongoDBAdaptor organizationMongoDBAdaptor; private final NoteMongoDBAdaptor noteMongoDBAdaptor; private Document previousDocument; @@ -62,6 +65,7 @@ public StudyCatalogMongoDBIterator(MongoDBIterator mongoCursor, Client this.studyFilter = studyFilter; this.options = ParamUtils.defaultObject(options, QueryOptions::new); this.user = user; + this.organizationMongoDBAdaptor = dbAdaptorFactory.getCatalogOrganizationDBAdaptor(); this.noteMongoDBAdaptor = dbAdaptorFactory.getCatalogNotesDBAdaptor(); this.logger = LoggerFactory.getLogger(StudyCatalogMongoDBIterator.class); @@ -84,26 +88,8 @@ private void getNextStudy() { } if (previousDocument != null) { - List noteField = Collections.singletonList(StudyDBAdaptor.QueryParams.NOTES.key()); - if (includeField(options, noteField)) { - Query query = new Query() - .append(NoteDBAdaptor.QueryParams.STUDY_UID.key(), previousDocument.get(StudyDBAdaptor.QueryParams.UID.key())) - .append(NoteDBAdaptor.QueryParams.SCOPE.key(), Note.Scope.STUDY.name()); - if (!isAtLeastStudyAdmin(previousDocument)) { - query.append(NoteDBAdaptor.QueryParams.VISIBILITY.key(), Note.Visibility.PUBLIC.name()); - } - - QueryOptions noteOptions = createInnerQueryOptionsForVersionedEntity(options, - StudyDBAdaptor.QueryParams.NOTES.key(), true); - noteOptions.put(QueryOptions.LIMIT, 1000); - try { - OpenCGAResult result = noteMongoDBAdaptor.nativeGet(clientSession, query, noteOptions); - previousDocument.put(StudyDBAdaptor.QueryParams.NOTES.key(), result.getResults()); - } catch (CatalogDBException e) { - logger.warn("Could not obtain the organization notes", e); - } - } - + addStudyNotes(); + addFederationRef(); addAclInformation(previousDocument, options); } } else { @@ -111,6 +97,68 @@ private void getNextStudy() { } } + private void addStudyNotes() { + List noteField = Collections.singletonList(StudyDBAdaptor.QueryParams.NOTES.key()); + if (includeField(options, noteField)) { + Query query = new Query() + .append(NoteDBAdaptor.QueryParams.STUDY_UID.key(), previousDocument.get(StudyDBAdaptor.QueryParams.UID.key())) + .append(NoteDBAdaptor.QueryParams.SCOPE.key(), Note.Scope.STUDY.name()); + if (!isAtLeastStudyAdmin(previousDocument)) { + query.append(NoteDBAdaptor.QueryParams.VISIBILITY.key(), Note.Visibility.PUBLIC.name()); + } + + QueryOptions noteOptions = createInnerQueryOptionsForVersionedEntity(options, + StudyDBAdaptor.QueryParams.NOTES.key(), true); + noteOptions.put(QueryOptions.LIMIT, 1000); + try { + OpenCGAResult result = noteMongoDBAdaptor.nativeGet(clientSession, query, noteOptions); + previousDocument.put(StudyDBAdaptor.QueryParams.NOTES.key(), result.getResults()); + } catch (CatalogDBException e) { + logger.warn("Could not obtain the organization notes", e); + } + } + } + + private void addFederationRef() { + Document federation = previousDocument.get(StudyDBAdaptor.QueryParams.FEDERATION.key(), Document.class); + if (federation == null) { + return; + } + String federationId = federation.getString("id"); + if (StringUtils.isEmpty(federationId)) { + return; + } + + // The study is federated. We need to fetch the information from the corresponding collection + QueryOptions orgOptions = new QueryOptions(QueryOptions.INCLUDE, OrganizationDBAdaptor.QueryParams.FEDERATION_CLIENTS.key()); + try { + Document organization = organizationMongoDBAdaptor.nativeGet(clientSession, user, orgOptions).first(); + Document orgFederation = organization.get(OrganizationDBAdaptor.QueryParams.FEDERATION.key(), Document.class); + if (orgFederation == null) { + logger.warn("Study {} is federated but federation information could not be filled in. Organization was not found.", + previousDocument.getString("fqn")); + return; + } + List clients = orgFederation.getList("clients", Document.class); + if (clients == null) { + logger.warn("Study {} is federated but federation information could not be filled in. Federation clients were not found.", + previousDocument.getString("fqn")); + return; + } + for (Document client : clients) { + String clientId = client.getString("id"); + if (federationId.equals(clientId)) { + previousDocument.put(StudyDBAdaptor.QueryParams.FEDERATION.key(), client); + return; + } + } + logger.warn("Study {} is federated but federation information could not be filled in. Federation information was not found.", + previousDocument.getString("fqn")); + } catch (CatalogDBException e) { + logger.warn("Could not obtain the Organization information", e); + } + } + @Override public boolean hasNext() { return this.previousDocument != null; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AbstractManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AbstractManager.java index da7566340a..e881c7a308 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AbstractManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AbstractManager.java @@ -28,10 +28,12 @@ import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.exceptions.CatalogParameterException; import org.opencb.opencga.catalog.models.InternalGetDataResult; +import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.config.Configuration; import org.opencb.opencga.core.models.IPrivateStudyUid; import org.opencb.opencga.core.models.study.Group; +import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -281,6 +283,25 @@ List getMissingFields(List original return differences; } + protected void checkIsNotAFederatedUser(String organizationId, List users) throws CatalogException { + if (CollectionUtils.isNotEmpty(users)) { + Query query = new Query(UserDBAdaptor.QueryParams.ID.key(), users); + OpenCGAResult result = catalogDBAdaptorFactory.getCatalogUserDBAdaptor(organizationId).get(query, + UserManager.INCLUDE_INTERNAL); + if (result.getNumResults() != users.size()) { + throw new CatalogException("Some users were not found."); + } + for (User user : result.getResults()) { + ParamUtils.checkObj(user.getInternal(), "internal"); + ParamUtils.checkObj(user.getInternal().getAccount(), "internal.account"); + ParamUtils.checkObj(user.getInternal().getAccount().getAuthentication(), "internal.account.authentication"); + if (user.getInternal().getAccount().getAuthentication().isFederation()) { + throw new CatalogException("User '" + user.getId() + "' is a federated user."); + } + } + } + } + /** * Checks if the list of members are all valid. *

diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CohortManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CohortManager.java index b7971c7610..de37fafe61 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CohortManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/CohortManager.java @@ -1349,7 +1349,7 @@ public OpenCGAResult> updateAcl(String studyId, CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyId, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyId, StudyManager.INCLUDE_STUDY_UID, userId, organizationId); + Study study = studyManager.resolveId(studyId, StudyManager.INCLUDE_CONFIGURATION, userId, organizationId); ObjectMap auditParams = new ObjectMap() .append("studyId", studyId) @@ -1391,6 +1391,13 @@ public OpenCGAResult> updateAcl(String studyId, } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } List cohortUids = cohortList.stream().map(Cohort::getUid).collect(Collectors.toList()); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java index 0cfa9018ef..48465e2028 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java @@ -1235,7 +1235,7 @@ public OpenCGAResult> updateAcl(String studyId, CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyId, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyId, userId, organizationId); + Study study = studyManager.resolveId(studyFqn, QueryOptions.empty(), tokenPayload); ObjectMap auditParams = new ObjectMap() .append("studyId", studyId) @@ -1301,6 +1301,13 @@ public OpenCGAResult> updateAcl(String studyId, } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } List aclParamsList = new LinkedList<>(); List familyUids = familyList.stream().map(Family::getUid).collect(Collectors.toList()); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FileManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FileManager.java index fcbb16c4e9..9204c45932 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FileManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FileManager.java @@ -2844,7 +2844,7 @@ public OpenCGAResult> updateAcl(String studyId, Li CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyId, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyId, userId, organizationId); + Study study = studyManager.resolveId(studyFqn, QueryOptions.empty(), tokenPayload); ObjectMap auditParams = new ObjectMap() .append("studyId", studyId) @@ -2907,6 +2907,13 @@ public OpenCGAResult> updateAcl(String studyId, Li } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } List fileUids = extendedFileList.stream().map(File::getUid).collect(Collectors.toList()); AuthorizationManager.CatalogAclParams catalogAclParams = new AuthorizationManager.CatalogAclParams(fileUids, permissions, diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java index cfe73a2788..d5ce39c56f 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java @@ -1333,7 +1333,7 @@ public OpenCGAResult> updateAcl(String study CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyId, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyId, StudyManager.INCLUDE_STUDY_UID, userId, organizationId); + Study study = studyManager.resolveId(studyId, StudyManager.INCLUDE_CONFIGURATION, userId, organizationId); ObjectMap auditParams = new ObjectMap() .append("studyId", studyId) @@ -1392,6 +1392,13 @@ public OpenCGAResult> updateAcl(String study } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } List individualUids = individualList.stream().map(Individual::getUid).collect(Collectors.toList()); List aclParamsList = new LinkedList<>(); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/JobManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/JobManager.java index fe7ffc18e4..5a3fb03ac7 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/JobManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/JobManager.java @@ -1914,7 +1914,7 @@ public OpenCGAResult> updateAcl(String studyId, Lis CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyId, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyId, userId, organizationId); + Study study = studyManager.resolveId(studyFqn, StudyManager.INCLUDE_CONFIGURATION, tokenPayload); ObjectMap auditParams = new ObjectMap() .append("studyId", studyId) @@ -1955,6 +1955,13 @@ public OpenCGAResult> updateAcl(String studyId, Lis } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } List jobUids = jobList.stream().map(Job::getUid).collect(Collectors.toList()); AuthorizationManager.CatalogAclParams catalogAclParams = new AuthorizationManager.CatalogAclParams(jobUids, permissions, diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java index 1d9ade5d1e..de56778866 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/OrganizationManager.java @@ -19,11 +19,11 @@ import org.opencb.opencga.catalog.exceptions.CatalogParameterException; import org.opencb.opencga.catalog.io.CatalogIOManager; import org.opencb.opencga.catalog.utils.Constants; -import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.catalog.utils.UuidUtils; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.common.GitRepositoryState; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.AuthenticationOrigin; import org.opencb.opencga.core.config.Configuration; @@ -255,6 +255,7 @@ public OpenCGAResult update(String organizationId, OrganizationUpd ParamUtils.checkObj(updateParams, "OrganizationUpdateParams"); if (StringUtils.isNotEmpty(updateParams.getOwner()) || CollectionUtils.isNotEmpty(updateParams.getAdmins())) { authorizationManager.checkIsAtLeastOrganizationOwner(organizationId, userId); + checkIsNotAFederatedUser(organizationId, updateParams); } else { authorizationManager.checkIsAtLeastOrganizationOwnerOrAdmin(organizationId, userId); } @@ -293,6 +294,23 @@ public OpenCGAResult update(String organizationId, OrganizationUpd return result; } + private void checkIsNotAFederatedUser(String organizationId, OrganizationUpdateParams updateParams) throws CatalogException { + Set users = new HashSet<>(); + if (StringUtils.isNotEmpty(updateParams.getOwner())) { + users.add(updateParams.getOwner()); + } + if (CollectionUtils.isNotEmpty(updateParams.getAdmins())) { + users.addAll(updateParams.getAdmins()); + } + if (CollectionUtils.isNotEmpty(users)) { + try { + checkIsNotAFederatedUser(organizationId, new ArrayList<>(users)); + } catch (CatalogException e) { + throw new CatalogException("Cannot set a federated user as owner or admin of an organization.", e); + } + } + } + public OpenCGAResult updateUser(@Nullable String organizationId, String userId, OrganizationUserUpdateParams updateParams, QueryOptions options, String token) throws CatalogException { JwtPayload tokenPayload = catalogManager.getUserManager().validateToken(token); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/PanelManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/PanelManager.java index fddf86b32c..5ebc2b79de 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/PanelManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/PanelManager.java @@ -1008,7 +1008,7 @@ public OpenCGAResult> updateAcl(String studyId, L CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyId, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyId, userId, organizationId); + Study study = studyManager.resolveId(studyFqn, StudyManager.INCLUDE_CONFIGURATION, tokenPayload); ObjectMap auditParams = new ObjectMap() .append("studyId", studyId) @@ -1049,6 +1049,13 @@ public OpenCGAResult> updateAcl(String studyId, L } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } List panelUids = panelDataResult.getResults().stream().map(Panel::getUid).collect(Collectors.toList()); AuthorizationManager.CatalogAclParams catalogAclParams = new AuthorizationManager.CatalogAclParams(panelUids, permissions, diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/SampleManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/SampleManager.java index 4897680662..edb020652d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/SampleManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/SampleManager.java @@ -1391,7 +1391,7 @@ public OpenCGAResult> updateAcl(String studyStr, CatalogFqn studyFqn = CatalogFqn.extractFqnFromStudy(studyStr, tokenPayload); String organizationId = studyFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); - Study study = studyManager.resolveId(studyStr, userId, organizationId); + Study study = studyManager.resolveId(studyFqn, StudyManager.INCLUDE_CONFIGURATION, tokenPayload); ObjectMap auditParams = new ObjectMap() .append("studyId", studyStr) @@ -1503,6 +1503,13 @@ public OpenCGAResult> updateAcl(String studyStr, } checkMembers(organizationId, study.getUid(), members); authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } } catch (CatalogException e) { if (sampleStringList != null) { for (String sampleId : sampleStringList) { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java index 9f45f18ded..488c390d9d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java @@ -111,9 +111,9 @@ public class StudyManager extends AbstractManager { static final QueryOptions INCLUDE_VARIABLE_SET = keepFieldInQueryOptions(INCLUDE_STUDY_IDS, StudyDBAdaptor.QueryParams.VARIABLE_SET.key()); static final QueryOptions INCLUDE_CONFIGURATION = keepFieldInQueryOptions(INCLUDE_STUDY_IDS, - StudyDBAdaptor.QueryParams.INTERNAL_CONFIGURATION.key()); + StudyDBAdaptor.QueryParams.INTERNAL.key()); static final QueryOptions INCLUDE_VARIABLE_SET_AND_CONFIGURATION = keepFieldsInQueryOptions(INCLUDE_STUDY_IDS, - Arrays.asList(StudyDBAdaptor.QueryParams.INTERNAL_CONFIGURATION.key(), StudyDBAdaptor.QueryParams.VARIABLE_SET.key())); + Arrays.asList(StudyDBAdaptor.QueryParams.INTERNAL.key(), StudyDBAdaptor.QueryParams.VARIABLE_SET.key())); protected Logger logger; @@ -1014,6 +1014,13 @@ public OpenCGAResult createGroup(String studyId, Group group, String toke } authorizationManager.checkCreateDeleteGroupPermissions(organizationId, study.getUid(), userId, group.getId()); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, group.getUserIds()); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } // Check group exists if (existsGroup(organizationId, study.getUid(), group.getId())) { @@ -1193,6 +1200,14 @@ public OpenCGAResult updateGroup(String studyId, String groupId, ParamUti if (tmpUsers.size() > 0) { getUserDBAdaptor(organizationId).checkIds(tmpUsers); } + + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, updateParams.getUsers()); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } } else { updateParams.setUsers(Collections.emptyList()); } @@ -1765,6 +1780,13 @@ public OpenCGAResult> updateAcl(Strin } authorizationManager.checkNotAssigningPermissionsToAdminsGroup(members); checkMembers(organizationId, study.getUid(), members); + if (study.getInternal().isFederated()) { + try { + checkIsNotAFederatedUser(organizationId, members); + } catch (CatalogException e) { + throw new CatalogException("Cannot provide access to federated users to a federated study.", e); + } + } switch (action) { case SET: @@ -2029,7 +2051,7 @@ public OpenCGAResult uploadTemplate(String studyStr, String filename, In throw e; } catch (Exception e) { auditManager.auditCreate(organizationId, userId, Enums.Action.UPLOAD_TEMPLATE, Enums.Resource.STUDY, templateId, "", - study.getId(), study.getUuid(), auditParams, new AuditRecord.Status(AuditRecord.Status.Result.ERROR, + study.getId(), study.getUuid(), auditParams, new AuditRecord.Status(AuditRecord.Status.Result.ERROR, new Error(-1, "template upload", e.getMessage()))); throw e; } finally { @@ -2089,7 +2111,7 @@ public OpenCGAResult deleteTemplate(String studyStr, String templateId, throw e; } catch (Exception e) { auditManager.auditCreate(organizationId, userId, Enums.Action.DELETE_TEMPLATE, Enums.Resource.STUDY, templateId, "", - study.getId(), study.getUuid(), auditParams, new AuditRecord.Status(AuditRecord.Status.Result.ERROR, + study.getId(), study.getUuid(), auditParams, new AuditRecord.Status(AuditRecord.Status.Result.ERROR, new Error(-1, "template delete", e.getMessage()))); throw e; } From 727b48e80146b1216365163d5049e1404890f9fa Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 21 Jan 2025 12:12:04 +0100 Subject: [PATCH 102/122] core: make MailUtils class more generic, #TASK-7192 --- .../CatalogAuthenticationManager.java | 10 +- .../opencb/opencga/core/common/MailUtils.java | 103 ++++++++---------- .../core/exceptions/MailException.java | 17 +++ 3 files changed, 63 insertions(+), 67 deletions(-) create mode 100644 opencga-core/src/main/java/org/opencb/opencga/core/exceptions/MailException.java diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java index 60af601a22..1371b3c700 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java @@ -150,14 +150,10 @@ public OpenCGAResult resetPassword(String organizationId, String userId) throws throw new CatalogException("Could not retrieve the user e-mail."); } - String email = user.first().getEmail(); - - String mailUser = this.emailConfig.getUser(); - String mailPassword = this.emailConfig.getPassword(); - String mailHost = this.emailConfig.getHost(); - String mailPort = this.emailConfig.getPort(); try { - MailUtils.sendResetPasswordMail(email, newPassword, mailUser, mailPassword, mailHost, mailPort, userId); + String email = user.first().getEmail(); + String resetMailContent = MailUtils.getResetMailContent(userId, newPassword); + MailUtils.configure(this.emailConfig).sendMail(email, "XetaBase: Password Reset", resetMailContent); result = dbAdaptorFactory.getCatalogUserDBAdaptor(organizationId).resetPassword(userId, email, newPassword); } catch (Exception e) { throw new CatalogException("Email could not be sent.", e); diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/MailUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/MailUtils.java index 0a5a1ad4f9..bdda1f4cab 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/MailUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/MailUtils.java @@ -16,6 +16,8 @@ package org.opencb.opencga.core.common; +import org.opencb.opencga.core.config.Email; +import org.opencb.opencga.core.exceptions.MailException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,19 +27,26 @@ import javax.mail.Transport; import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; -import java.util.Date; import java.util.Properties; public class MailUtils { private static final Logger logger = LoggerFactory.getLogger(MailUtils.class); - public static void sendResetPasswordMail(String to, String newPassword, final String mailUser, final String mailPassword, - String mailHost, String mailPort, String userId) throws Exception { + private final Email email; + private MailUtils(Email emailConfig) { + this.email = emailConfig; + } + + public static MailUtils configure(Email emailConfig) { + return new MailUtils(emailConfig); + } + + public void sendMail(String targetMail, String subject, String content) throws MailException { Properties props = new Properties(); - props.put("mail.smtp.host", mailHost); - props.put("mail.smtp.port", mailPort); + props.put("mail.smtp.host", email.getHost()); + props.put("mail.smtp.port", email.getPort()); props.put("mail.smtp.auth", "true"); props.put("mail.smtp.ssl.enable", "true"); props.put("mail.smtp.starttls.enable", "true"); @@ -49,65 +58,39 @@ public static void sendResetPasswordMail(String to, String newPassword, final St Session session = Session.getInstance(props, new javax.mail.Authenticator() { protected PasswordAuthentication getPasswordAuthentication() { - return new PasswordAuthentication(mailUser, mailPassword); + return new PasswordAuthentication(email.getUser(), email.getPassword()); } }); - logger.info("Sending reset password from" + mailUser + " to " + to + " using " + mailHost + ":" + mailPort); - Message message = new MimeMessage(session); - message.setFrom(new InternetAddress(mailUser)); - message.setRecipients(Message.RecipientType.TO, - InternetAddress.parse(to)); - - message.setSubject("XetaBase: Password Reset"); - message.setText(getEmailContent(userId,newPassword)); - Transport.send(message); + logger.info("Sending email from '{}' to '{}' using '{}:{}'", email.getUser(), targetMail, email.getHost(), email.getPort()); + + try { + Message message = new MimeMessage(session); + message.setFrom(new InternetAddress(email.getUser())); + message.setRecipients(Message.RecipientType.TO, + InternetAddress.parse(targetMail)); + + message.setSubject(subject); + message.setText(content); + Transport.send(message); + } catch (Exception e) { + throw new MailException("Could not send email.", e); + } } - public static String getEmailContent(String userId, String temporaryPassword) { - StringBuilder sb = new StringBuilder(); - - sb.append("Hi ").append(userId).append(",\n\n"); - sb.append("We confirm that your password has been successfully reset.\n\n"); - sb.append("Please find your new login credentials below:\n\n"); - sb.append("User ID: ").append(userId).append("\n"); - sb.append("Temporary Password: ").append(temporaryPassword).append("\n\n"); - sb.append("For your security, we strongly recommend that you log in using the temporary password provided "); - sb.append("and promptly create a new password that is unique and known only to you. "); - sb.append("You can change your password by accessing \"Your Profile > Change Password\" in your User Profile.\n\n"); - sb.append("If you did not request a password reset, please contact our support team immediately at support@zettagenomics.com.\n\n"); - sb.append("Best regards,\n\n"); - sb.append("ZettaGenomics Support Team \n\n"); - - - - return sb.toString(); + public static String getResetMailContent(String userId, String temporaryPassword) { + return new StringBuilder() + .append("Hi ").append(userId).append(",\n\n") + .append("We confirm that your password has been successfully reset.\n\n") + .append("Please find your new login credentials below:\n\n") + .append("User ID: ").append(userId).append("\n") + .append("Temporary Password: ").append(temporaryPassword).append("\n\n") + .append("For your security, we strongly recommend that you log in using the temporary password provided ") + .append("and promptly create a new password that is unique and known only to you. ") + .append("You can change your password by accessing \"Your Profile > Change Password\" in your User Profile.\n\n") + .append("If you did not request a password reset, please contact our support team immediately at support@zettagenomics.com.\n\n") + .append("Best regards,\n\n") + .append("ZettaGenomics Support Team \n\n") + .toString(); } - public static void sendMail(String smtpServer, String to, String from, String subject, String body) throws Exception { - - Properties props = System.getProperties(); - // -- Attaching to default Session, or we could start a new one -- - props.put("mail.smtp.host", smtpServer); - Session session = Session.getDefaultInstance(props, null); - // -- Create a new message -- - // Message msg = new javax.mail.Message(session); - Message msg = new MimeMessage(session); - // -- Set the FROM and TO fields -- - msg.setFrom(new InternetAddress(from)); - msg.setRecipients(Message.RecipientType.TO, InternetAddress.parse(to, false)); - // -- We could include CC recipients too -- - // if (cc != null) - // msg.setRecipients(Message.RecipientType.CC - // ,InternetAddress.parse(cc, false)); - // -- Set the subject and body text -- - msg.setSubject(subject); - msg.setText(body); - // -- Set some other header information -- - msg.setHeader("X-Mailer", "LOTONtechEmail"); - msg.setSentDate(new Date()); - // -- Send the message -- - Transport.send(msg); - System.out.println("Message sent OK."); - - } } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/exceptions/MailException.java b/opencga-core/src/main/java/org/opencb/opencga/core/exceptions/MailException.java new file mode 100644 index 0000000000..60c7b20273 --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/exceptions/MailException.java @@ -0,0 +1,17 @@ +package org.opencb.opencga.core.exceptions; + +public class MailException extends Exception { + + public MailException(String message) { + super(message); + } + + public MailException(String message, Throwable cause) { + super(message, cause); + } + + public MailException(Throwable cause) { + super(cause); + } + +} From 0f799ec8a4d6c3527ae3434ccd620be8f823e4e3 Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 21 Jan 2025 15:13:42 +0100 Subject: [PATCH 103/122] catalog: fix AuthenticationManager implementation, #TASK-7192 --- .../authentication/AuthenticationManager.java | 94 ++----------------- .../AzureADAuthenticationManager.java | 9 +- .../CatalogAuthenticationManager.java | 18 +--- .../auth/authentication/JwtManager.java | 9 +- .../LDAPAuthenticationManager.java | 30 +----- .../SSOAuthenticationManager.java | 13 +-- .../azure/AuthenticationFactory.java | 19 ++-- .../authentication/JwtSessionManagerTest.java | 4 +- 8 files changed, 27 insertions(+), 169 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java index 27ba16b0f7..054643155c 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java @@ -79,20 +79,6 @@ Key converStringToKeyObject(String keyString, String jcaAlgorithm) { public abstract AuthenticationResponse authenticate(String organizationId, String userId, String password) throws CatalogAuthenticationException; - /** - * Authenticate the user against the Authentication server. - * - * @param organizationId Organization id. - * @param userId User to authenticate - * @param password Password. - * @param secretKey Secret key to apply to the token. - * @return AuthenticationResponse object. - * @throws CatalogAuthenticationException CatalogAuthenticationException if any of the credentials are wrong or the access is denied - * for any other reason. - */ - public abstract AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) - throws CatalogAuthenticationException; - /** * Authenticate the user against the Authentication server. * @@ -114,19 +100,15 @@ public AuthenticationResponse refreshToken(String refreshToken) throws CatalogAu * Validates that the token is valid. * * @param token token that have been assigned to a user. - * @param secretKey secret key to be used for the token validation (may be null). + * @param securityKey key used to fully ensure it corresponds to that user. * @throws CatalogAuthenticationException when the token does not correspond to any user, is expired or has been altered. */ - public void validateToken(String token, @Nullable String secretKey) throws CatalogAuthenticationException { + public void validateToken(String token, @Nullable String securityKey) throws CatalogAuthenticationException { if (StringUtils.isEmpty(token) || "null".equalsIgnoreCase(token)) { throw new CatalogAuthenticationException("Token is null or empty."); } - Key privateKey = null; - if (secretKey != null) { - privateKey = converStringToKeyObject(secretKey, jwtManager.getAlgorithm().getJcaName()); - } - jwtManager.validateToken(token, privateKey); + jwtManager.validateToken(token); } /** @@ -208,20 +190,7 @@ public abstract void changePassword(String organizationId, String userId, String * @return A token. */ public String createToken(String organizationId, String userId) throws CatalogAuthenticationException { - return createToken(organizationId, userId, Collections.emptyMap(), expiration, (Key) null); - } - - /** - * Create a token for the user with default expiration time. - * - * @param organizationId Organization id. - * @param userId user. - * @param secretKey secret key to be used for the token generation. - * @throws CatalogAuthenticationException CatalogAuthenticationException - * @return A token. - */ - public String createToken(String organizationId, String userId, String secretKey) throws CatalogAuthenticationException { - return createToken(organizationId, userId, Collections.emptyMap(), expiration, secretKey); + return createToken(organizationId, userId, Collections.emptyMap(), expiration); } /** @@ -234,57 +203,7 @@ public String createToken(String organizationId, String userId, String secretKey * @return A token. */ public String createToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { - return createToken(organizationId, userId, claims, expiration, (Key) null); - } - - /** - * Create a token for the user with default expiration time. - * - * @param organizationId Organization id. - * @param userId user. - * @param claims claims. - * @param secretKey secret key to be used for the token generation. - * @throws CatalogAuthenticationException CatalogAuthenticationException - * @return A token. - */ - public String createToken(String organizationId, String userId, Map claims, String secretKey) - throws CatalogAuthenticationException { - return createToken(organizationId, userId, claims, expiration, secretKey); - } - - /** - * Create a token for the user. - * - * @param organizationId Organization id. - * @param userId user. - * @param claims claims. - * @param expiration Expiration time in seconds. - * @throws CatalogAuthenticationException CatalogAuthenticationException - * @return A token. - */ - public String createToken(String organizationId, String userId, Map claims, long expiration) - throws CatalogAuthenticationException { - return createToken(organizationId, userId, claims, expiration, (Key) null); - } - - /** - * Create a token for the user. - * - * @param organizationId Organization id. - * @param userId user. - * @param claims claims. - * @param expiration Expiration time in seconds. - * @param secretKey Secret key to be used for the token generation. - * @throws CatalogAuthenticationException CatalogAuthenticationException - * @return A token. - */ - public String createToken(String organizationId, String userId, Map claims, long expiration, String secretKey) - throws CatalogAuthenticationException { - Key privateKey = null; - if (secretKey != null) { - privateKey = converStringToKeyObject(secretKey, jwtManager.getAlgorithm().getJcaName()); - } - return createToken(organizationId, userId, claims, expiration, privateKey); + return createToken(organizationId, userId, claims, expiration); } /** @@ -294,11 +213,10 @@ public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) + public abstract String createToken(String organizationId, String userId, Map claims, long expiration) throws CatalogAuthenticationException; /** diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java index 7728a138ba..f5445c4628 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AzureADAuthenticationManager.java @@ -50,7 +50,6 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; -import java.security.Key; import java.security.PublicKey; import java.security.cert.CertificateException; import java.security.cert.CertificateFactory; @@ -261,12 +260,6 @@ public AuthenticationResponse authenticate(String organizationId, String userId, } } - @Override - public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) - throws CatalogAuthenticationException { - throw new UnsupportedOperationException("AzureAD creates its own tokens. Please, call to the other authenticate method."); - } - @Override public AuthenticationResponse refreshToken(String refreshToken) throws CatalogAuthenticationException { AuthenticationContext context; @@ -429,7 +422,7 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) { + public String createToken(String organizationId, String userId, Map claims, long expiration) { // Tokens are generated by Azure via authorization code or user-password throw new UnsupportedOperationException("Tokens are generated by Azure via authorization code or user-password"); } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java index 1371b3c700..e6003a39a2 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/CatalogAuthenticationManager.java @@ -84,17 +84,6 @@ public AuthenticationResponse authenticate(String organizationId, String userId, } } - @Override - public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) - throws CatalogAuthenticationException { - try { - dbAdaptorFactory.getCatalogUserDBAdaptor(organizationId).authenticate(userId, password); - return new AuthenticationResponse(createToken(organizationId, userId, secretKey)); - } catch (CatalogDBException e) { - throw new CatalogAuthenticationException("Could not validate '" + userId + "' password\n" + e.getMessage(), e); - } - } - @Override public List getUsersFromRemoteGroup(String group) throws CatalogException { throw new UnsupportedOperationException(); @@ -121,19 +110,18 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) + public String createToken(String organizationId, String userId, Map claims, long expiration) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, - secretKey, expiration); + expiration); } @Override public String createNonExpiringToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, - null, 0L); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.OPENCGA, userId, claims, federations, 0L); } @Override diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java index f3c69cdec0..332be098dd 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/JwtManager.java @@ -90,8 +90,7 @@ public JwtManager setPublicKey(Key publicKey) { } public String createJWTToken(String organizationId, AuthenticationOrigin.AuthenticationType type, String userId, - Map claims, List federations, Key secretKey, - long expiration) { + Map claims, List federations, long expiration) { long currentTime = System.currentTimeMillis(); JwtBuilder jwtBuilder = Jwts.builder(); @@ -109,7 +108,7 @@ public String createJWTToken(String organizationId, AuthenticationOrigin.Authent .setAudience(organizationId) .setIssuer("OpenCGA") .setIssuedAt(new Date(currentTime)) - .signWith(secretKey != null ? secretKey : privateKey, algorithm); + .signWith(privateKey, algorithm); // Set the expiration in number of seconds only if 'expiration' is greater than 0 if (expiration > 0) { @@ -123,10 +122,6 @@ public void validateToken(String token) throws CatalogAuthenticationException { parseClaims(token); } - public void validateToken(String token, Key publicKey) throws CatalogAuthenticationException { - parseClaims(token, publicKey); - } - public JwtPayload getPayload(String token) throws CatalogAuthenticationException { Claims body = parseClaims(token).getBody(); return new JwtPayload(body.getSubject(), body.getAudience(), getAuthOrigin(body), body.getIssuer(), body.getIssuedAt(), diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java index 731aff9df2..9d1ab62a93 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/LDAPAuthenticationManager.java @@ -171,30 +171,6 @@ public AuthenticationResponse authenticate(String organizationId, String userId, return new AuthenticationResponse(createToken(organizationId, userId, claims)); } - @Override - public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) - throws CatalogAuthenticationException { - Map claims = new HashMap<>(); - - List userInfoFromLDAP = getUserInfoFromLDAP(Arrays.asList(userId), usersSearch); - if (userInfoFromLDAP.isEmpty()) { - throw new CatalogAuthenticationException("LDAP: The user id " + userId + " could not be found."); - } - - String rdn = getDN(userInfoFromLDAP.get(0)); - claims.put(OPENCGA_DISTINGUISHED_NAME, rdn); - - // Attempt to authenticate - Hashtable env = getEnv(rdn, password); - try { - getDirContext(env).close(); - } catch (NamingException e) { - throw wrapException(e); - } - - return new AuthenticationResponse(createToken(organizationId, userId, claims, secretKey)); - } - @Override public List getUsersFromRemoteGroup(String group) throws CatalogException { List usersFromLDAP = getUsersFromLDAPGroup(group, groupsSearch); @@ -261,17 +237,17 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) + public String createToken(String organizationId, String userId, Map claims, long expiration) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, secretKey, expiration); + return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, expiration); } @Override public String createNonExpiringToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, null, 0L); + return jwtManager.createJWTToken(organizationId, AuthenticationType.LDAP, userId, claims, federations, 0L); } /* Private methods */ diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java index 30a5fe1fb6..2edf385a33 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/SSOAuthenticationManager.java @@ -42,12 +42,6 @@ public AuthenticationResponse authenticate(String organizationId, String userId, throw new NotImplementedException("Authentication should be done through SSO"); } - @Override - public AuthenticationResponse authenticate(String organizationId, String userId, String password, String secretKey) - throws CatalogAuthenticationException { - throw new NotImplementedException("Authentication should be done through SSO"); - } - @Override public List getUsersFromRemoteGroup(String group) throws CatalogException { throw new NotImplementedException("Operation not implemented"); @@ -79,19 +73,18 @@ public void newPassword(String organizationId, String userId, String newPassword } @Override - public String createToken(String organizationId, String userId, Map claims, long expiration, Key secretKey) + public String createToken(String organizationId, String userId, Map claims, long expiration) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, - secretKey, expiration); + expiration); } @Override public String createNonExpiringToken(String organizationId, String userId, Map claims) throws CatalogAuthenticationException { List federations = getFederations(organizationId, userId); - return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, null, - 0L); + return jwtManager.createJWTToken(organizationId, AuthenticationOrigin.AuthenticationType.SSO, userId, claims, federations, 0L); } @Override diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java index 48bf2ee07a..211e9e3d44 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/azure/AuthenticationFactory.java @@ -105,28 +105,23 @@ public String createToken(String organizationId, String authOriginId, String use public void validateToken(String organizationId, Account.AuthenticationOrigin authenticationOrigin, JwtPayload jwtPayload) throws CatalogException { - String secretKey = null; + String securityKey = null; if (authenticationOrigin.isFederation()) { - // The user is a federated user, so we need to use a different secret key - secretKey = getFederationSecretKey(organizationId, jwtPayload.getUserId()); + // The user is a federated user, so the token should have been encrypted using the security key + securityKey = getFederationSecurityKey(organizationId, jwtPayload.getUserId()); } - getOrganizationAuthenticationManager(organizationId, authenticationOrigin.getId()).validateToken(jwtPayload.getToken(), secretKey); + getOrganizationAuthenticationManager(organizationId, authenticationOrigin.getId()).validateToken(jwtPayload.getToken(), + securityKey); } public AuthenticationResponse authenticate(String organizationId, Account.AuthenticationOrigin authenticationOrigin, String userId, String password) throws CatalogException { AuthenticationManager organizationAuthenticationManager = getOrganizationAuthenticationManager(organizationId, authenticationOrigin.getId()); - if (authenticationOrigin.isFederation()) { - // The user is a federated user, so we need to use a different secret key - String secretKey = getFederationSecretKey(organizationId, userId); - return organizationAuthenticationManager.authenticate(organizationId, userId, password, secretKey); - } else { - return organizationAuthenticationManager.authenticate(organizationId, userId, password); - } + return organizationAuthenticationManager.authenticate(organizationId, userId, password); } - private String getFederationSecretKey(String organizationId, String userId) throws CatalogException { + private String getFederationSecurityKey(String organizationId, String userId) throws CatalogException { QueryOptions options = new QueryOptions(QueryOptions.INCLUDE, OrganizationDBAdaptor.QueryParams.FEDERATION.key()); Organization organization = catalogDBAdaptorFactory.getCatalogOrganizationDBAdaptor(organizationId).get(options).first(); if (organization.getFederation() == null) { diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java index 4b0fd82eca..2ba03ee135 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/auth/authentication/JwtSessionManagerTest.java @@ -53,7 +53,7 @@ public void setUp() throws Exception { @Test public void testCreateJWTToken() throws Exception { - jwtToken = jwtSessionManager.createJWTToken(organizationId, null, "testUser", Collections.emptyMap(), null, null, 60L); + jwtToken = jwtSessionManager.createJWTToken(organizationId, null, "testUser", Collections.emptyMap(), null, 60L); } @Test @@ -81,7 +81,7 @@ public void testInvalidSecretKey() throws CatalogAuthenticationException { @Test public void testNonExpiringToken() throws CatalogException { - String nonExpiringToken = jwtSessionManager.createJWTToken(organizationId, null, "System", null, null, null, -1L); + String nonExpiringToken = jwtSessionManager.createJWTToken(organizationId, null, "System", null, null, -1L); assertEquals(jwtSessionManager.getUser(nonExpiringToken), "System"); assertNull(jwtSessionManager.getExpiration(nonExpiringToken)); } From 81dda66f0feafa7fcc623354a4101ec8c0967558 Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 21 Jan 2025 17:13:01 +0100 Subject: [PATCH 104/122] catalog: validate token using security key, #TASK-7192 --- .../authentication/AuthenticationManager.java | 19 +++++- .../opencb/opencga/core/common/JwtUtils.java | 61 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java index 054643155c..bd0ada1614 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/auth/authentication/AuthenticationManager.java @@ -19,11 +19,13 @@ import org.apache.commons.lang3.StringUtils; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.utils.CryptoUtils; import org.opencb.opencga.catalog.db.DBAdaptorFactory; import org.opencb.opencga.catalog.db.api.ProjectDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.api.ParamConstants; +import org.opencb.opencga.core.common.JwtUtils; import org.opencb.opencga.core.models.JwtPayload; import org.opencb.opencga.core.models.project.Project; import org.opencb.opencga.core.models.study.Study; @@ -34,6 +36,7 @@ import org.slf4j.LoggerFactory; import javax.annotation.Nullable; +import javax.crypto.SecretKey; import javax.crypto.spec.SecretKeySpec; import java.io.Closeable; import java.security.Key; @@ -108,7 +111,21 @@ public void validateToken(String token, @Nullable String securityKey) throws Cat throw new CatalogAuthenticationException("Token is null or empty."); } - jwtManager.validateToken(token); + String tokenToValidate; + if (StringUtils.isNotEmpty(securityKey)) { + SecretKey secretKey = CryptoUtils.stringToSecretKey(securityKey); + JwtUtils.Token tokenObj = JwtUtils.getToken(token); + String validation; + try { + validation = CryptoUtils.decrypt(tokenObj.getVerifySignature(), secretKey); + } catch (Exception e) { + throw new CatalogAuthenticationException("Could not decrypt cyphered token.", e); + } + tokenToValidate = JwtUtils.generateToken(tokenObj.getHeader(), tokenObj.getPayload(), validation); + } else { + tokenToValidate = token; + } + jwtManager.validateToken(tokenToValidate); } /** diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java b/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java index 213e5f7ac0..b385780a51 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/common/JwtUtils.java @@ -70,4 +70,65 @@ public static List getFederations(Map Date: Fri, 24 Jan 2025 15:48:04 +0100 Subject: [PATCH 105/122] catalog: return corresponding orgId when there's federation, #TASK-7192 --- .../opencb/opencga/catalog/utils/CatalogFqn.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/CatalogFqn.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/CatalogFqn.java index 4bd98981a8..23df560fa8 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/CatalogFqn.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/CatalogFqn.java @@ -1,10 +1,14 @@ package org.opencb.opencga.catalog.utils; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.opencb.opencga.core.models.JwtPayload; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; public final class CatalogFqn { @@ -112,7 +116,17 @@ public static CatalogFqn extractFqnFromStudy(String studyStr, JwtPayload payload Matcher matcher = ORGANIZATION_PROJECT_STUDY_PATTERN.matcher(studyStr); if (matcher.find()) { // studyStr contains the full path (organization@project:study) - String organizationId = matcher.group(1); + + // Check for federated studies + Set federatedStudies = new HashSet<>(); + if (CollectionUtils.isNotEmpty(payload.getFederations())) { + federatedStudies = payload.getFederations().stream() + .flatMap(federation -> federation.getStudyIds().stream()) + .collect(Collectors.toSet()); + } + + // If the study is federated, the organization that we should be using (where it should be stored) is the one from the payload + String organizationId = federatedStudies.contains(studyStr) ? payload.getOrganization() : matcher.group(1); String projectId = matcher.group(2); String studyId = matcher.group(3); return new CatalogFqn(organizationId, studyStr) From 7bd9cb67899bc90e1d5b357dcce77c5c12fff046 Mon Sep 17 00:00:00 2001 From: mbleda Date: Tue, 28 Jan 2025 23:21:18 +0000 Subject: [PATCH 106/122] R client: typo fix #TASK-6369 --- opencga-client/src/main/R/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-client/src/main/R/DESCRIPTION b/opencga-client/src/main/R/DESCRIPTION index fd130957f4..1b5e25a026 100644 --- a/opencga-client/src/main/R/DESCRIPTION +++ b/opencga-client/src/main/R/DESCRIPTION @@ -1,7 +1,7 @@ Package: opencgaR Type: Package Title: Querying Opencga Data -Version: Version: ${opencgar.version} +Version: ${opencgar.version} Author: Zetta Genomics Maintainer: Zetta Genomics Description: R client that contains classes and methods for working with From 702238de19858e0ffb590b8d019c3c079df3be45 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 29 Jan 2025 17:02:31 +0100 Subject: [PATCH 107/122] Prepare release 3.4.0 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 14 +++++++------- 27 files changed, 33 insertions(+), 33 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 24019f12dd..14b1404e60 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index a86630c5ab..b5d40abf2d 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 31df518865..215db3ef31 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 68d77922ca..4e0287294f 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index a43011299c..ed80ade813 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 6707b15204..d91439c992 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 67cd743de8..58bbee5338 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 1fcff2032a..2d9521d745 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 9d3555928b..b2a15d3bd0 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index c9bd1a8856..e464730b9c 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 78e4374579..779ffc9ad5 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index f099046300..7767066aac 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index dd8857dbcb..a72ca982d1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index 3aab4cb38e..ce0ae1fa54 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 66134df8dc..8aba5b7020 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index e975ebbc12..98f203777f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 48b381275c..32503f1208 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index 1c5876905e..a0367ec71b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 991c499dc8..33f5d55da8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index a960e1a068..4e3ff642d2 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index ae6332feb3..3198f8fd7a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index db8d5b9aec..a3038f0d03 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 80807b54c5..8ffcf93d7c 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 3dd2a91dd4..342a0fab3b 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 3ab26cac21..4246a5a35b 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 8b5c1157ef..88d54e58bf 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 ../pom.xml diff --git a/pom.xml b/pom.xml index dbfd8bf025..065a89d755 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0-SNAPSHOT + 3.4.0 pom OpenCGA @@ -43,12 +43,12 @@ - 3.4.0_dev - 3.4.0_dev - 6.4.0-SNAPSHOT - 3.4.0-SNAPSHOT - 5.4.0-SNAPSHOT - 3.4.0-SNAPSHOT + 3.4.0 + 3.4.0 + 6.4.0 + 3.4.0 + 5.4.0 + 3.4.0 0.2.0 From 0aa804301bb2b9da748cea3655d6b8a4eb805583 Mon Sep 17 00:00:00 2001 From: pfurio Date: Wed, 5 Feb 2025 11:51:07 +0100 Subject: [PATCH 108/122] catalog: extract new user validator, #TASK-7192 --- .../opencga/catalog/managers/UserManager.java | 75 ++++++++++--------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java index 23e2eace64..3b73074b5f 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java @@ -128,9 +128,46 @@ public OpenCGAResult create(User user, String password, String token) thro Organization organization = getOrganizationDBAdaptor(organizationId).get(OrganizationManager.INCLUDE_ORGANIZATION_CONFIGURATION) .first(); - + validateNewUser(user, password, organization.getConfiguration().getDefaultUserExpirationDate(), organizationId); ObjectMap auditParams = new ObjectMap("user", user); + if (!ParamConstants.ADMIN_ORGANIZATION.equals(organizationId) || !OPENCGA.equals(user.getId())) { + JwtPayload jwtPayload = validateToken(token); + // If it's not one of the SUPERADMIN users or the owner or one of the admins of the organisation, we should not allow it + if (!authorizationManager.isAtLeastOrganizationOwnerOrAdmin(organizationId, jwtPayload.getUserId(organizationId))) { + String errorMsg = "Please ask your administrator to create your account."; + auditManager.auditCreate(organizationId, user.getId(), Enums.Resource.USER, user.getId(), "", "", "", auditParams, + new AuditRecord.Status(AuditRecord.Status.Result.ERROR, new Error(0, "", errorMsg))); + throw new CatalogException(errorMsg); + } + } + + checkUserExists(organizationId, user.getId()); + + try { + if (StringUtils.isNotEmpty(password) && !PasswordUtils.isStrongPassword(password)) { + throw new CatalogException("Invalid password. " + PasswordUtils.PASSWORD_REQUIREMENT); + } + if (user.getProjects() != null && !user.getProjects().isEmpty()) { + throw new CatalogException("Creating user and projects in a single transaction is forbidden"); + } + + getUserDBAdaptor(organizationId).insert(user, password, QueryOptions.empty()); + + auditManager.auditCreate(organizationId, user.getId(), Enums.Resource.USER, user.getId(), "", "", "", auditParams, + new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS)); + + return getUserDBAdaptor(organizationId).get(user.getId(), QueryOptions.empty()); + } catch (CatalogIOException | CatalogDBException e) { + auditManager.auditCreate(organizationId, user.getId(), Enums.Resource.USER, user.getId(), "", "", "", auditParams, + new AuditRecord.Status(AuditRecord.Status.Result.ERROR, e.getError())); + + throw e; + } + } + + public void validateNewUser(User user, String password, String defaultUserExpirationDate, String organizationId) + throws CatalogException { // Initialise fields ParamUtils.checkObj(user, "User"); ParamUtils.checkValidUserId(user.getId()); @@ -157,7 +194,7 @@ public OpenCGAResult create(User user, String password, String token) thro Account account = user.getInternal().getAccount(); account.setPassword(ParamUtils.defaultObject(account.getPassword(), Password::new)); if (StringUtils.isEmpty(account.getExpirationDate())) { - account.setExpirationDate(organization.getConfiguration().getDefaultUserExpirationDate()); + account.setExpirationDate(defaultUserExpirationDate); } else { // Validate expiration date is not over ParamUtils.checkDateIsNotExpired(account.getExpirationDate(), UserDBAdaptor.QueryParams.INTERNAL_ACCOUNT_EXPIRATION_DATE.key()); @@ -188,40 +225,6 @@ public OpenCGAResult create(User user, String password, String token) thro Date date = TimeUtils.addDaysToCurrentDate(configuration.getAccount().getPasswordExpirationDays()); account.getPassword().setExpirationDate(TimeUtils.getTime(date)); } - - if (!ParamConstants.ADMIN_ORGANIZATION.equals(organizationId) || !OPENCGA.equals(user.getId())) { - JwtPayload jwtPayload = validateToken(token); - // If it's not one of the SUPERADMIN users or the owner or one of the admins of the organisation, we should not allow it - if (!authorizationManager.isAtLeastOrganizationOwnerOrAdmin(organizationId, jwtPayload.getUserId(organizationId))) { - String errorMsg = "Please ask your administrator to create your account."; - auditManager.auditCreate(organizationId, user.getId(), Enums.Resource.USER, user.getId(), "", "", "", auditParams, - new AuditRecord.Status(AuditRecord.Status.Result.ERROR, new Error(0, "", errorMsg))); - throw new CatalogException(errorMsg); - } - } - - checkUserExists(organizationId, user.getId()); - - try { - if (StringUtils.isNotEmpty(password) && !PasswordUtils.isStrongPassword(password)) { - throw new CatalogException("Invalid password. " + PasswordUtils.PASSWORD_REQUIREMENT); - } - if (user.getProjects() != null && !user.getProjects().isEmpty()) { - throw new CatalogException("Creating user and projects in a single transaction is forbidden"); - } - - getUserDBAdaptor(organizationId).insert(user, password, QueryOptions.empty()); - - auditManager.auditCreate(organizationId, user.getId(), Enums.Resource.USER, user.getId(), "", "", "", auditParams, - new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS)); - - return getUserDBAdaptor(organizationId).get(user.getId(), QueryOptions.empty()); - } catch (CatalogIOException | CatalogDBException e) { - auditManager.auditCreate(organizationId, user.getId(), Enums.Resource.USER, user.getId(), "", "", "", auditParams, - new AuditRecord.Status(AuditRecord.Status.Result.ERROR, e.getError())); - - throw e; - } } /** From 9d54b2b5e27888f86d99ef0ea2f0013d19b7e66d Mon Sep 17 00:00:00 2001 From: pfurio Date: Wed, 5 Feb 2025 13:59:21 +0100 Subject: [PATCH 109/122] catalog: fix study delete operation, #TASK-7192 --- .../catalog/db/mongodb/StudyMongoDBAdaptor.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java index 7cf1ec343c..6b540af81e 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java @@ -1503,26 +1503,23 @@ OpenCGAResult privateDelete(ClientSession clientSession, Document studyD String studyId = studyDocument.getString(QueryParams.ID.key()); long studyUid = studyDocument.getLong(PRIVATE_UID); - long projectUid = studyDocument.getEmbedded(Arrays.asList(PRIVATE_PROJECT, PRIVATE_UID), -1L); logger.debug("Deleting study {} ({})", studyId, studyUid); // TODO: In the future, we will want to delete also all the files, samples, cohorts... associated // Add status DELETED - studyDocument.put(QueryParams.INTERNAL_STATUS.key(), getMongoDBDocument(new InternalStatus(InternalStatus.DELETED), "status")); + Document internal = studyDocument.get("internal", Document.class); + if (internal != null) { + internal.put("status", getMongoDBDocument(new InternalStatus(InternalStatus.DELETED), "status")); + } // Upsert the document into the DELETED collection - Bson query = new Document() - .append(QueryParams.ID.key(), studyId) - .append(PRIVATE_PROJECT_UID, projectUid); + Bson query = new Document(PRIVATE_UID, studyUid); deletedStudyCollection.update(clientSession, query, new Document("$set", studyDocument), new QueryOptions(MongoDBCollection.UPSERT, true)); // Delete the document from the main STUDY collection - query = new Document() - .append(PRIVATE_UID, studyUid) - .append(PRIVATE_PROJECT_UID, projectUid); DataResult remove = studyCollection.remove(clientSession, query, null); if (remove.getNumMatches() == 0) { throw new CatalogDBException("Study " + studyId + " not found"); From 0a7ee97009cd7cd0adb331e1e5c707bb35b5fdd9 Mon Sep 17 00:00:00 2001 From: pfurio Date: Wed, 5 Feb 2025 14:35:35 +0100 Subject: [PATCH 110/122] catalog: fix project delete implementation, #TASK-7192 --- .../db/mongodb/ProjectMongoDBAdaptor.java | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java index d6ea97b3af..788a0d0a36 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/ProjectMongoDBAdaptor.java @@ -426,9 +426,8 @@ OpenCGAResult privateDelete(ClientSession clientSession, Project projec long tmpStartTime = startQuery(); logger.debug("Deleting project {} ({})", project.getId(), project.getUid()); - StudyMongoDBAdaptor studyDBAdaptor = dbAdaptorFactory.getCatalogStudyDBAdaptor(); - // First, we delete the studies + StudyMongoDBAdaptor studyDBAdaptor = dbAdaptorFactory.getCatalogStudyDBAdaptor(); Query studyQuery = new Query(StudyDBAdaptor.QueryParams.PROJECT_UID.key(), project.getUid()); List studyList = studyDBAdaptor.nativeGet(clientSession, studyQuery, QueryOptions.empty()).getResults(); if (studyList != null) { @@ -437,25 +436,33 @@ OpenCGAResult privateDelete(ClientSession clientSession, Project projec } } - String deleteSuffix = INTERNAL_DELIMITER + "DELETED_" + TimeUtils.getTime(); - // Mark the study as deleted - ObjectMap updateParams = new ObjectMap() - .append(INTERNAL_STATUS_ID.key(), InternalStatus.DELETED) - .append(QueryParams.INTERNAL_STATUS_DATE.key(), TimeUtils.getTime()) - .append(QueryParams.ID.key(), project.getId() + deleteSuffix); + Query query = new Query(PRIVATE_UID, project.getUid()); + Document projectDoc = nativeGet(clientSession, query, QueryOptions.empty()).first(); + + // Add status DELETED + Document internal = projectDoc.get("internal", Document.class); + if (internal != null) { + internal.put("status", getMongoDBDocument(new InternalStatus(InternalStatus.DELETED), "status")); + } + + Bson bsonQuery = parseQuery(query); + + // Upsert the document into the DELETED collection + deletedProjectCollection.update(clientSession, bsonQuery, new Document("$set", projectDoc), + new QueryOptions(MongoDBCollection.UPSERT, true)); - DataResult result = privateUpdate(clientSession, project, updateParams); - if (result.getNumMatches() == 0) { + // Delete the document from the main PROJECT collection + DataResult remove = projectCollection.remove(clientSession, bsonQuery, null); + if (remove.getNumMatches() == 0) { throw new CatalogDBException("Project " + project.getId() + " not found"); } - List events = new ArrayList<>(); - if (result.getNumUpdated() == 0) { - events.add(new Event(Event.Type.WARNING, project.getId(), "Project was already deleted")); + if (remove.getNumDeleted() == 0) { + throw new CatalogDBException("Project " + project.getId() + " could not be deleted"); } logger.debug("Project {} successfully deleted", project.getId()); - return endWrite(tmpStartTime, 1, 0, 0, 1, events); + return endWrite(tmpStartTime, 1, 0, 0, 1, null); } OpenCGAResult setStatus(Query query, String status) throws CatalogDBException { From a79a62cbee18941a7e7afbefc37259d76f32051a Mon Sep 17 00:00:00 2001 From: pfurio Date: Wed, 5 Feb 2025 16:27:58 +0100 Subject: [PATCH 111/122] catalog: control some actions on federated studies, #TASK-7192 --- .../opencb/opencga/catalog/managers/StudyManager.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java index 488c390d9d..a9b5e48a67 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java @@ -1187,6 +1187,12 @@ public OpenCGAResult updateGroup(String studyId, String groupId, ParamUti authorizationManager.checkUpdateGroupPermissions(organizationId, study.getUid(), userId, groupId, action); + if (study.getInternal().isFederated()) { + if (!groupId.equals(MEMBERS)) { + throw new CatalogException("Cannot modify groups other than the '" + MEMBERS + "' group in federated studies."); + } + } + if (CollectionUtils.isNotEmpty(updateParams.getUsers())) { List tmpUsers = updateParams.getUsers(); if (groupId.equals(MEMBERS) || groupId.equals(ADMINS)) { @@ -1727,6 +1733,9 @@ public OpenCGAResult> updateAcl(Strin if (action == null) { throw new CatalogException("Invalid action found. Please choose a valid action to be performed."); } + if (study.getInternal().isFederated()) { + throw new CatalogException("Cannot modify ACLs of a federated study."); + } List permissions = Collections.emptyList(); if (StringUtils.isNotEmpty(aclParams.getPermissions())) { From 3a1d87dff1346d3586a77538e0fbbec5b3d968c3 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 6 Feb 2025 12:38:03 +0100 Subject: [PATCH 112/122] Prepare port patch v3.4.0 -> v4.0.0 #TASK-7214 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-compat-api/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.0/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.2/pom.xml | 2 +- .../opencga-storage-hadoop-compat-hbase2.4/pom.xml | 2 +- .../opencga-storage-hadoop-compat/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-emr6.13/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdi5.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-lib/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 14 +++++++------- 27 files changed, 33 insertions(+), 33 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 14b1404e60..ad1a6f765e 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index b5d40abf2d..76ea292d61 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 215db3ef31..df6a3d06d7 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 4e0287294f..ee6e9dbf30 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index ed80ade813..226fd9bac1 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index d91439c992..c1fb9ac506 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 58bbee5338..ca1c6c3c21 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 2d9521d745..0a226d06df 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index b2a15d3bd0..b4771cd977 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index e464730b9c..c6d485f309 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 779ffc9ad5..0142ea1ddf 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml index 7767066aac..3b408a1127 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index a72ca982d1..ce30f130cf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index ce0ae1fa54..145210cc55 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 8aba5b7020..6f8c73de44 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-compat - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 98f203777f..05bdba5388 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 32503f1208..7d6cf6e355 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml index a0367ec71b..71c3d74464 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml index 33f5d55da8..1752fa6a3d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr6.13/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml index 4e3ff642d2..6556d12cbc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdi5.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml index 3198f8fd7a..2d8e814b74 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hdp3.1/pom.xml @@ -7,7 +7,7 @@ org.opencb.opencga opencga-storage-hadoop-lib - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index a3038f0d03..4753a66d85 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 8ffcf93d7c..acb4cd4b09 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 342a0fab3b..8d40be5a82 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 4246a5a35b..7e3cf23f5a 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 88d54e58bf..0dcd3f4db4 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 065a89d755..adb16d2472 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 3.4.0 + 4.0.0-SNAPSHOT pom OpenCGA @@ -43,12 +43,12 @@ - 3.4.0 - 3.4.0 - 6.4.0 - 3.4.0 - 5.4.0 - 3.4.0 + 4.0.0_dev + 4.0.0_dev + 7.0.0-SNAPSHOT + 4.0.0-SNAPSHOT + 6.0.0-SNAPSHOT + 4.0.0-SNAPSHOT 0.2.0 From e6d65447042c9b483c749f86891e7fe15ed1a80b Mon Sep 17 00:00:00 2001 From: pfurio Date: Thu, 6 Feb 2025 17:59:12 +0100 Subject: [PATCH 113/122] catalog: fix queries for federated data, #TASK-7192 --- .../catalog/managers/ProjectManager.java | 11 ++++++-- .../catalog/managers/StudyManager.java | 26 ++++++++++--------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java index 8a86eb1f4d..36102dc7c6 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/ProjectManager.java @@ -121,13 +121,20 @@ OpenCGAResult resolveId(CatalogFqn catalogFqn, QueryOptions options, Jw throw new CatalogException("Internal error. Missing project id or uuid."); } - OpenCGAResult projectDataResult = getProjectDBAdaptor(catalogFqn.getOrganizationId()).get(query, queryOptions, userId); + String organizationId = payload.getOrganization(); + String organizationFqn = catalogFqn.getOrganizationId(); + if (!organizationId.equals(organizationFqn) && !authorizationManager.isOpencgaAdministrator(payload)) { + // User may be trying to fetch a federated project + organizationFqn = organizationId; + } + + OpenCGAResult projectDataResult = getProjectDBAdaptor(organizationFqn).get(query, queryOptions, userId); if (projectDataResult.getNumResults() > 1) { throw new CatalogException("Please be more concrete with the project. More than one project found for " + userId + " user"); } else if (projectDataResult.getNumResults() == 1) { return projectDataResult; } else { - projectDataResult = getProjectDBAdaptor(catalogFqn.getOrganizationId()).get(query, queryOptions); + projectDataResult = getProjectDBAdaptor(organizationFqn).get(query, queryOptions); if (projectDataResult.getNumResults() == 0) { throw new CatalogException("No project found given '" + catalogFqn.getProvidedId() + "'."); } else { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java index a9b5e48a67..a03a5a54b4 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java @@ -223,15 +223,22 @@ private OpenCGAResult smartResolutor(CatalogFqn catalogFqn, QueryOptions fixQueryOptions(queryOptions, INCLUDE_STUDY_IDS.getAsStringList(QueryOptions.INCLUDE)); } + String payloadOrgId = payload.getOrganization(); + String fqnOrgId = catalogFqn.getOrganizationId(); + boolean isOpenCGAAdmin = authorizationManager.isOpencgaAdministrator(payload); + if (!payloadOrgId.equals(fqnOrgId) && !isOpenCGAAdmin) { + // User may be trying to fetch a federated project + fqnOrgId = payloadOrgId; + } + OpenCGAResult studyDataResult; - if (!payload.getOrganization().equals(catalogFqn.getOrganizationId())) { + if (isOpenCGAAdmin) { // If it is the administrator, we allow it without checking the user anymore - authorizationManager.checkIsOpencgaAdministrator(payload); - studyDataResult = getStudyDBAdaptor(catalogFqn.getOrganizationId()).get(query, queryOptions); + studyDataResult = getStudyDBAdaptor(fqnOrgId).get(query, queryOptions); } else { - studyDataResult = getStudyDBAdaptor(catalogFqn.getOrganizationId()).get(query, queryOptions, userId); + studyDataResult = getStudyDBAdaptor(fqnOrgId).get(query, queryOptions, userId); if (studyDataResult.getNumResults() == 0) { - studyDataResult = getStudyDBAdaptor(catalogFqn.getOrganizationId()).get(query, queryOptions); + studyDataResult = getStudyDBAdaptor(fqnOrgId).get(query, queryOptions); if (studyDataResult.getNumResults() != 0) { throw CatalogAuthorizationException.denyAny(userId, "view", "study"); } @@ -312,13 +319,8 @@ private OpenCGAResult smartResolutor(String studyStr, QueryOptions option } } - if (organizationFqn != null && !organizationId.equals(organizationFqn) - && !ParamConstants.ADMIN_ORGANIZATION.equals(organizationId)) { - logger.error("User '{}' belonging to organization '{}' requested access to organization '{}'", userId, organizationId, - organizationFqn); - throw new CatalogAuthorizationException("Cannot access data from a different organization."); - } else { - // If organization is not part of the FQN, assign it with the organization the user belongs to. + if (!organizationId.equals(organizationFqn) && !ParamConstants.ADMIN_ORGANIZATION.equals(organizationId)) { + // User may be trying to fetch a federated study organizationFqn = organizationId; } From f7fa1985275cded16825ca1638d7ebc69a4d7cfa Mon Sep 17 00:00:00 2001 From: pfurio Date: Fri, 7 Feb 2025 10:19:50 +0100 Subject: [PATCH 114/122] app: fix user migration, #TASK-6013 --- .../opencga/app/migrations/v3/v3_1_0/UserBanMigration.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java index b7d79b7391..6603e1113b 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java @@ -1,5 +1,6 @@ package org.opencb.opencga.app.migrations.v3.v3_1_0; +import com.mongodb.client.MongoCollection; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Projections; import com.mongodb.client.model.UpdateOneModel; @@ -12,7 +13,7 @@ import org.opencb.opencga.core.common.TimeUtils; @Migration(id = "addFailedLoginAttemptsMigration", description = "Add failedAttempts to User #TASK-6013", version = "3.2.0", - language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20240419) + language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.CATALOG, date = 20240419, patch = 2) public class UserBanMigration extends MigrationTool { @Override @@ -36,6 +37,10 @@ protected void run() throws Exception { ) ); }); + + MongoCollection orgCollection = getMongoCollection(OrganizationMongoDBAdaptorFactory.ORGANIZATION_COLLECTION); + orgCollection.updateMany(Filters.exists("configuration.defaultUserExpirationDate", false), + Updates.set("configuration.defaultUserExpirationDate", Constants.DEFAULT_USER_EXPIRATION_DATE)); } } From 2b9bbd63405a5977786308608595cefa3abc92fc Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 10 Feb 2025 13:28:09 +0100 Subject: [PATCH 115/122] catalog: add method to remove user from all groups in one go, #TASK-7192 --- .../v3/v3_1_0/UserBanMigration.java | 1 + .../MoveUserAccountToInternalMigration.java | 12 +++++++ .../catalog/db/api/StudyDBAdaptor.java | 2 ++ .../mongodb/OrganizationMongoDBAdaptor.java | 12 +++---- .../db/mongodb/StudyMongoDBAdaptor.java | 34 +++++++++++++++++-- 5 files changed, 53 insertions(+), 8 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java index 6603e1113b..aa0834da33 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_1_0/UserBanMigration.java @@ -38,6 +38,7 @@ protected void run() throws Exception { ); }); + // Patch 2. Organization 'configuration.defaultUserExpirationDate' field was not set for existing organizations MongoCollection orgCollection = getMongoCollection(OrganizationMongoDBAdaptorFactory.ORGANIZATION_COLLECTION); orgCollection.updateMany(Filters.exists("configuration.defaultUserExpirationDate", false), Updates.set("configuration.defaultUserExpirationDate", Constants.DEFAULT_USER_EXPIRATION_DATE)); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_2_1/MoveUserAccountToInternalMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_2_1/MoveUserAccountToInternalMigration.java index 68f2cc00e7..d2a3b680e1 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_2_1/MoveUserAccountToInternalMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_2_1/MoveUserAccountToInternalMigration.java @@ -1,14 +1,17 @@ package org.opencb.opencga.app.migrations.v3.v3_2_1; +import com.mongodb.client.MongoCollection; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Projections; import com.mongodb.client.model.UpdateOneModel; +import com.mongodb.client.model.Updates; import org.bson.Document; import org.bson.conversions.Bson; import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory; import org.opencb.opencga.catalog.migration.Migration; import org.opencb.opencga.catalog.migration.MigrationTool; +import org.opencb.opencga.catalog.utils.Constants; import java.util.Arrays; @@ -46,5 +49,14 @@ protected void run() throws Exception { bulk.add(new UpdateOneModel<>(Filters.eq("_id", document.get("_id")), updateDocument.toFinalUpdateDocument())); }); + + // Due to patch 2 from TASK-6013, default user expirationDate for some users was not set. + MongoCollection userCollection = getMongoCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION); + userCollection.updateMany( + Filters.or( + Filters.eq("internal.account.expirationDate", null), + Filters.eq("internal.account.expirationDate", "") + ), + Updates.set("internal.account.expirationDate", Constants.DEFAULT_USER_EXPIRATION_DATE)); } } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java index 3d2e0e95b3..4aeaad7251 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/api/StudyDBAdaptor.java @@ -252,6 +252,8 @@ OpenCGAResult setUsersToGroup(long studyId, String groupId, List OpenCGAResult removeUsersFromAllGroups(long studyId, List users) throws CatalogException; + OpenCGAResult removeUsersFromAllGroups(List users) throws CatalogException; + /** * Delete a group. * diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java index 2f17c1101d..d81bcd2044 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/OrganizationMongoDBAdaptor.java @@ -418,13 +418,13 @@ private void fixFederationClientForRemoval(ObjectMap parameters) { } List federationParamList = new LinkedList<>(); for (Object federationClient : parameters.getAsList(QueryParams.FEDERATION_CLIENTS.key())) { - if (federationClient instanceof FederationServerParams) { - federationParamList.add(new Document("id", ((FederationServerParams) federationClient).getId())); + if (federationClient instanceof FederationClientParams) { + federationParamList.add(new Document("id", ((FederationClientParams) federationClient).getId())); } else { federationParamList.add(new Document("id", ((Map) federationClient).get("id"))); } } - parameters.putNested(QueryParams.FEDERATION_CLIENTS.key(), federationParamList, false); + parameters.put(QueryParams.FEDERATION_CLIENTS.key(), federationParamList, false); } private void fixFederationServerForRemoval(ObjectMap parameters) { @@ -433,13 +433,13 @@ private void fixFederationServerForRemoval(ObjectMap parameters) { } List federationParamList = new LinkedList<>(); for (Object federationServer : parameters.getAsList(QueryParams.FEDERATION_SERVERS.key())) { - if (federationServer instanceof FederationClientParams) { - federationParamList.add(new Document("id", ((FederationClientParams) federationServer).getId())); + if (federationServer instanceof FederationServerParams) { + federationParamList.add(new Document("id", ((FederationServerParams) federationServer).getId())); } else { federationParamList.add(new Document("id", ((Map) federationServer).get("id"))); } } - parameters.putNested(QueryParams.FEDERATION_SERVERS.key(), federationParamList, false); + parameters.put(QueryParams.FEDERATION_SERVERS.key(), federationParamList, false); } private void fixAuthOriginsForRemoval(ObjectMap parameters) { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java index 6b540af81e..4d9582f70d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/StudyMongoDBAdaptor.java @@ -492,7 +492,7 @@ OpenCGAResult removeUsersFromAdminsGroup(ClientSession clientSession, Lis @Override public OpenCGAResult removeUsersFromAllGroups(long studyId, List users) throws CatalogException { - if (users == null || users.size() == 0) { + if (CollectionUtils.isEmpty(users)) { throw new CatalogDBException("Unable to remove users from groups. List of users is empty"); } @@ -507,7 +507,7 @@ public OpenCGAResult removeUsersFromAllGroups(long studyId, List Bson pull = Updates.pullAll("groups.$.userIds", users); // Pull those users while they are still there - DataResult update; + DataResult update; do { update = studyCollection.update(clientSession, query, pull, null); } while (update.getNumUpdated() > 0); @@ -520,6 +520,36 @@ public OpenCGAResult removeUsersFromAllGroups(long studyId, List } } + @Override + public OpenCGAResult removeUsersFromAllGroups(List users) throws CatalogException { + if (CollectionUtils.isEmpty(users)) { + throw new CatalogDBException("Unable to remove users from groups. List of users is empty"); + } + + try { + return runTransaction(clientSession -> { + long tmpStartTime = startQuery(); + logger.debug("Removing list of users '{}' from all groups from all studies", users); + + Document query = new Document() + .append(QueryParams.GROUP_USER_IDS.key(), new Document("$in", users)); + Bson pull = Updates.pullAll("groups.$.userIds", users); + + QueryOptions multi = new QueryOptions(MongoDBCollection.MULTI, true); + // Pull those users while they are still there + DataResult update; + do { + update = studyCollection.update(clientSession, query, pull, multi); + } while (update.getNumUpdated() > 0); + + return endWrite(tmpStartTime, -1, -1, null); + }); + } catch (Exception e) { + logger.error("Could not remove users from all groups from all studies. {}", e.getMessage()); + throw e; + } + } + @Override public OpenCGAResult deleteGroup(long studyId, String groupId) throws CatalogDBException { Bson queryBson = new Document() From 8a074154fbecaafe5748a0aff9ed9eddb75dafc3 Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 10 Feb 2025 15:58:55 +0100 Subject: [PATCH 116/122] catalog: block user accounts even if token is valid, #TASK-7192 --- .../CatalogAuthenticationException.java | 20 ------ .../CatalogAuthorizationException.java | 20 ++++++ .../opencga/catalog/managers/UserManager.java | 63 ++++++++++--------- .../catalog/managers/UserManagerTest.java | 11 ++++ 4 files changed, 65 insertions(+), 49 deletions(-) diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthenticationException.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthenticationException.java index 8ac1b5c385..8cb658885d 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthenticationException.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthenticationException.java @@ -61,26 +61,6 @@ public static CatalogAuthenticationException incorrectUserOrPassword(String doma return new CatalogAuthenticationException(domain + ": Incorrect user or password.", e); } - public static CatalogAuthenticationException userIsBanned(String userId) { - return new CatalogAuthenticationException("Too many login attempts. The account for user '" + userId + "' is banned." - + " Please, talk to your organization owner/administrator."); - } - - public static CatalogAuthenticationException userIsSuspended(String userId) { - return new CatalogAuthenticationException("The account for user '" + userId + "' is suspended. Please, talk to your organization" - + " owner/administrator."); - } - - public static CatalogAuthenticationException accountIsExpired(String userId, String expirationDate) { - return new CatalogAuthenticationException("The account for user '" + userId + "' expired on " + expirationDate + ". Please," - + " talk to your organization owner/administrator."); - } - - public static CatalogAuthenticationException passwordExpired(String userId, String expirationDate) { - return new CatalogAuthenticationException("The password for the user account '" + userId + "' expired on " + expirationDate - + ". Please, reset your password or talk to your organization owner/administrator."); - } - public static CatalogAuthenticationException userNotAllowed(String domain) { return new CatalogAuthenticationException(domain + ": User not allowed to access the system."); } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthorizationException.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthorizationException.java index 546d311720..e8d173ff23 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthorizationException.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/exceptions/CatalogAuthorizationException.java @@ -110,6 +110,26 @@ public static CatalogAuthorizationException denyAny(String userId, String permis + " cannot " + permission + " any " + resource + "."); } + public static CatalogAuthorizationException userIsBanned(String userId) { + return new CatalogAuthorizationException("Too many login attempts. The account for user '" + userId + "' is banned." + + " Please, talk to your organization owner/administrator."); + } + + public static CatalogAuthorizationException userIsSuspended(String userId) { + return new CatalogAuthorizationException("The account for user '" + userId + "' is suspended. Please, talk to your organization" + + " owner/administrator."); + } + + public static CatalogAuthorizationException accountIsExpired(String userId, String expirationDate) { + return new CatalogAuthorizationException("The account for user '" + userId + "' expired on " + expirationDate + ". Please," + + " talk to your organization owner/administrator."); + } + + public static CatalogAuthorizationException passwordExpired(String userId, String expirationDate) { + return new CatalogAuthorizationException("The password for the user account '" + userId + "' expired on " + expirationDate + + ". Please, reset your password or talk to your organization owner/administrator."); + } + public static CatalogAuthorizationException notOrganizationOwner() { return notOrganizationOwner("perform this action"); } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java index 9fc8cd7b70..092e6f70c9 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/UserManager.java @@ -321,6 +321,7 @@ public JwtPayload validateToken(String token) throws CatalogException { throw new CatalogException("User '" + jwtPayload.getUserId() + "' could not be found."); } User user = userResult.first(); + checkValidUserAccountStatus(user.getId(), user); authOrigin = user.getInternal().getAccount().getAuthentication(); } @@ -854,35 +855,7 @@ public AuthenticationResponse login(String organizationId, String username, Stri && CatalogAuthenticationManager.OPENCGA.equals(user.getInternal().getAccount().getAuthentication().getId()); // We check if (userCanBeBanned) { - // Check user is not banned, suspended or has an expired account - if (UserStatus.BANNED.equals(user.getInternal().getStatus().getId())) { - throw CatalogAuthenticationException.userIsBanned(username); - } - if (UserStatus.SUSPENDED.equals(user.getInternal().getStatus().getId())) { - throw CatalogAuthenticationException.userIsSuspended(username); - } - Account account2 = user.getInternal().getAccount(); - if (account2.getPassword().getExpirationDate() != null) { - Account account1 = user.getInternal().getAccount(); - Date passwordExpirationDate = TimeUtils.toDate(account1.getPassword().getExpirationDate()); - if (passwordExpirationDate == null) { - throw new CatalogException("Unexpected null 'passwordExpirationDate' for user '" + username + "'."); - } - if (passwordExpirationDate.before(new Date())) { - Account account = user.getInternal().getAccount(); - throw CatalogAuthenticationException.passwordExpired(username, account.getPassword().getExpirationDate()); - } - } - if (user.getInternal().getAccount().getExpirationDate() != null) { - Date date = TimeUtils.toDate(user.getInternal().getAccount().getExpirationDate()); - if (date == null) { - throw new CatalogException("Unexpected null 'expirationDate' for user '" + username + "'."); - } - if (date.before(new Date())) { - throw CatalogAuthenticationException.accountIsExpired(username, - user.getInternal().getAccount().getExpirationDate()); - } - } + checkValidUserAccountStatus(username, user); } Account.AuthenticationOrigin authentication = user.getInternal().getAccount().getAuthentication(); authId = user.getInternal().getAccount().getAuthentication().getId(); @@ -969,6 +942,38 @@ public AuthenticationResponse login(String organizationId, String username, Stri return response; } + private static void checkValidUserAccountStatus(String username, User user) throws CatalogException { + // Check user is not banned, suspended or has an expired account + if (UserStatus.BANNED.equals(user.getInternal().getStatus().getId())) { + throw CatalogAuthorizationException.userIsBanned(username); + } + if (UserStatus.SUSPENDED.equals(user.getInternal().getStatus().getId())) { + throw CatalogAuthorizationException.userIsSuspended(username); + } + Account account2 = user.getInternal().getAccount(); + if (account2.getPassword().getExpirationDate() != null) { + Account account1 = user.getInternal().getAccount(); + Date passwordExpirationDate = TimeUtils.toDate(account1.getPassword().getExpirationDate()); + if (passwordExpirationDate == null) { + throw new CatalogException("Unexpected null 'passwordExpirationDate' for user '" + username + "'."); + } + if (passwordExpirationDate.before(new Date())) { + Account account = user.getInternal().getAccount(); + throw CatalogAuthorizationException.passwordExpired(username, account.getPassword().getExpirationDate()); + } + } + if (user.getInternal().getAccount().getExpirationDate() != null) { + Date date = TimeUtils.toDate(user.getInternal().getAccount().getExpirationDate()); + if (date == null) { + throw new CatalogException("Unexpected null 'expirationDate' for user '" + username + "'."); + } + if (date.before(new Date())) { + throw CatalogAuthorizationException.accountIsExpired(username, + user.getInternal().getAccount().getExpirationDate()); + } + } + } + public AuthenticationResponse loginAnonymous(String organizationId) throws CatalogException { ParamUtils.checkParameter(organizationId, "organization id"); diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java index 6f3e507b72..f9e93f4e10 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/UserManagerTest.java @@ -285,6 +285,17 @@ public void changeUserStatusTest() throws CatalogException { assertTrue(dbException.getMessage().contains("not exist")); } + @Test + public void userIsImmediatelyBlockedTest() throws CatalogException { + OpenCGAResult studyOpenCGAResult = catalogManager.getStudyManager().get(studyFqn, QueryOptions.empty(), normalToken1); + assertEquals(1, studyOpenCGAResult.getNumResults()); + + catalogManager.getUserManager().changeStatus(organizationId, normalUserId1, UserStatus.SUSPENDED, QueryOptions.empty(), ownerToken); + CatalogAuthorizationException exception = assertThrows(CatalogAuthorizationException.class, + () -> catalogManager.getStudyManager().get(studyFqn, QueryOptions.empty(), normalToken1)); + assertTrue(exception.getMessage().contains("suspended")); + } + @Test public void loginExpiredAccountTest() throws CatalogException { // Expire account of normalUserId1 From 775ef957e917737e1f5d96c7db7619c4ca5bb9f8 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 11 Feb 2025 11:14:15 +0100 Subject: [PATCH 117/122] fix merge cicd --- .github/workflows/pull-request-merged.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull-request-merged.yml b/.github/workflows/pull-request-merged.yml index ac1a2651c0..cabfd4120c 100644 --- a/.github/workflows/pull-request-merged.yml +++ b/.github/workflows/pull-request-merged.yml @@ -20,4 +20,4 @@ jobs: run: | echo "Deleting docker images" python3 ./opencga-app/app/cloud/docker/docker-build.py delete --images base --tag ${{ github.head_ref }} - + secrets: inherit From 4b3a4d32053b32df3531d68d8e279a58debeedef Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 11 Feb 2025 16:35:04 +0100 Subject: [PATCH 118/122] server: improve response builder, #TASK-7192 --- .../opencga/server/rest/OpenCGAWSServer.java | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index c8231bdddb..b0bfbbb785 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -747,27 +747,45 @@ protected Response createOkResponse(Object obj, List events) { for (OpenCGAResult openCGAResult : list) { setFederationServer(openCGAResult, uriInfo.getBaseUri().toString()); } - Response.Status status = getResponseStatus(list); - queryResponse.setResponses(list); + Response.Status status = getResponseStatus(queryResponse); Response response = Response.fromResponse(createJsonResponse(queryResponse)).status(status).build(); logResponse(response.getStatusInfo(), queryResponse); return response; } - protected Response.Status getResponseStatus(List> list) { - if (list != null) { - for (OpenCGAResult openCGAResult : list) { - if (CollectionUtils.isNotEmpty(openCGAResult.getEvents())) { - for (Event event : openCGAResult.getEvents()) { - if (event.getType().equals(Event.Type.ERROR)) { - if (event.getMessage().contains("denied")) { - return Response.Status.UNAUTHORIZED; - } else { - return Response.Status.BAD_REQUEST; - } - } + protected Response createResponse(RestResponse restResponse) { + Response.Status status = getResponseStatus(restResponse); + Response response = Response.fromResponse(createJsonResponse(restResponse)).status(status).build(); + logResponse(response.getStatusInfo(), restResponse); + return response; + } + + protected Response.Status getResponseStatus(RestResponse restResponse) { + Response.Status responseStatus = getResponseStatus(restResponse.getEvents()); + if (responseStatus != Response.Status.OK) { + return responseStatus; + } + if (CollectionUtils.isNotEmpty(restResponse.getResponses())) { + for (OpenCGAResult response : restResponse.getResponses()) { + responseStatus = getResponseStatus(response.getEvents()); + if (responseStatus != Response.Status.OK) { + return responseStatus; + } + } + } + return Response.Status.OK; + } + + protected Response.Status getResponseStatus(List eventList) { + if (CollectionUtils.isNotEmpty(eventList)) { + for (Event event : eventList) { + if (event.getType().equals(Event.Type.ERROR)) { + if (event.getMessage().contains("denied")) { + return Response.Status.UNAUTHORIZED; + } else { + return Response.Status.BAD_REQUEST; } } } From 4906e241f1672f2c094116ea69f4a102ea4655f6 Mon Sep 17 00:00:00 2001 From: pfurio Date: Wed, 12 Feb 2025 15:14:23 +0100 Subject: [PATCH 119/122] core: add fields to client mixin, #TASK-7192 --- .../core/models/federation/FederationClientParamsMixin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java index ecf0be95ca..9589ecb536 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/federation/FederationClientParamsMixin.java @@ -2,6 +2,6 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -@JsonIgnoreProperties({"password", "securityKey", "token"}) +@JsonIgnoreProperties({"email", "url", "organizationId", "userId", "password", "securityKey", "token"}) public class FederationClientParamsMixin { } From ab7ec95d02d3e3245733d6ddf43a41ab283305ff Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 12 Feb 2025 17:42:46 +0100 Subject: [PATCH 120/122] fix merge cicd --- .github/workflows/pull-request-approved.yml | 2 +- .github/workflows/pull-request-merged.yml | 10 ++++++--- .../workflows/scripts/get-xetabase-branch.sh | 22 ++++++++++--------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index cf72bc1490..5094b5ade5 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -25,7 +25,7 @@ jobs: chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" - xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) ${{ github.event.pull_request.head.ref }} echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch: ${xetabase_branch}" echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT diff --git a/.github/workflows/pull-request-merged.yml b/.github/workflows/pull-request-merged.yml index cabfd4120c..3a1c2da035 100644 --- a/.github/workflows/pull-request-merged.yml +++ b/.github/workflows/pull-request-merged.yml @@ -16,8 +16,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: '10' - - name: delete docker images + - uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_PASSWORD }} + - name: Delete in Docker Hub run: | - echo "Deleting docker images" + echo "Deleting docker image ${{ github.head_ref }}" python3 ./opencga-app/app/cloud/docker/docker-build.py delete --images base --tag ${{ github.head_ref }} - secrets: inherit + diff --git a/.github/workflows/scripts/get-xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh index 40ad247ecf..11129eb96d 100644 --- a/.github/workflows/scripts/get-xetabase-branch.sh +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -3,24 +3,25 @@ # Function to calculate the corresponding branch of Xetabase project get_xetabase_branch() { # Input parameter (branch name) - input_branch="$1" + target_branch="$1" + current_branch="$1" # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it - if [[ $input_branch == TASK* ]]; then - if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then - echo "$input_branch"; + if [[ $current_branch == TASK* ]]; then + if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$target_branch" )" ] ; then + echo "$target_branch"; return 0; fi fi # Check if the branch name is "develop" in that case return the same branch name - if [[ "$input_branch" == "develop" ]]; then + if [[ "$target_branch" == "develop" ]]; then echo "develop" return 0 fi # Check if the branch name starts with "release-" and follows the patterns "release-a.x.x" or "release-a.b.x" - if [[ "$input_branch" =~ ^release-([0-9]+)\.x\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]]; then + if [[ "$target_branch" =~ ^release-([0-9]+)\.x\.x$ ]] || [[ "$target_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]]; then # Extract the MAJOR part of the branch name MAJOR=${BASH_REMATCH[1]} # Calculate the XETABASE_MAJOR by subtracting 1 from MAJOR of opencga @@ -31,7 +32,7 @@ get_xetabase_branch() { return 1 fi # Construct and echo the new branch name - echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}" + echo "release-$XETABASE_MAJOR.${target_branch#release-$MAJOR.}" return 0 fi @@ -41,10 +42,11 @@ get_xetabase_branch() { } # Check if the script receives exactly one argument -if [ "$#" -ne 1 ]; then - echo "Usage: $0 " +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " exit 1 fi + # Call the function with the input branch name -get_xetabase_branch "$1" +get_xetabase_branch "$1" "$2" From 56c92c2ed4813eacd0173bffb560ff1b8a1d84ff Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 14 Feb 2025 17:31:00 +0100 Subject: [PATCH 121/122] Fix pull request approved --- .github/workflows/pull-request-approved.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index 5094b5ade5..16dd6b6567 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -25,7 +25,7 @@ jobs: chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" - xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) ${{ github.event.pull_request.head.ref }} + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }} ${{ github.event.pull_request.head.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch: ${xetabase_branch}" echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT From 81a31a643c5feb8ae47a51097ef3b30365d6dbee Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 18 Feb 2025 14:49:26 +0100 Subject: [PATCH 122/122] Create the initial version two new yml Manual Delete Docker Image and Reusable delete docker for #TASK-7417 --- .github/workflows/manual-delete-docker.yml | 17 +++++++++++ .github/workflows/reusable-delete-docker.yml | 32 ++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 .github/workflows/manual-delete-docker.yml create mode 100644 .github/workflows/reusable-delete-docker.yml diff --git a/.github/workflows/manual-delete-docker.yml b/.github/workflows/manual-delete-docker.yml new file mode 100644 index 0000000000..da28ac6b95 --- /dev/null +++ b/.github/workflows/manual-delete-docker.yml @@ -0,0 +1,17 @@ +name: Manual Delete Docker Image + +on: + workflow_dispatch: + inputs: + task: + description: "Tag of the Docker image to delete (must start with 'TASK')" + required: true + type: string + +jobs: + call-delete-docker: + name: Call Reusable Delete Docker Workflow + uses: ./.github/workflows/delete-docker.yml + with: + task: ${{ inputs.task }} + secrets: inherit diff --git a/.github/workflows/reusable-delete-docker.yml b/.github/workflows/reusable-delete-docker.yml new file mode 100644 index 0000000000..419ce3dae5 --- /dev/null +++ b/.github/workflows/reusable-delete-docker.yml @@ -0,0 +1,32 @@ +name: Reusable delete docker workflow from DockerHub + +on: + workflow_call: + inputs: + task: + type: string + required: true + secrets: + DOCKER_HUB_USER: + required: true + DOCKER_HUB_PASSWORD: + required: true + +jobs: + delete-docker: + name: Execute delete docker image + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: '10' + - name: Validate task name + if: ${{ !startsWith(inputs.task, 'TASK') }} + run: | + echo "Error: Can't delete ${{ inputs.task }}. Only Docker images related to TASK branches can be deleted" + exit 1 + - name: Delete in Docker Hub + if: ${{ startsWith(inputs.task, 'TASK') }} + run: | + echo "Deleting docker image ${{ inputs.task }}" + python3 ./opencga-app/app/cloud/docker/docker-build.py delete --images base --tag ${{ inputs.task }} --username ${{ secrets.DOCKER_HUB_USER }} --password ${{ secrets.DOCKER_HUB_PASSWORD }} \ No newline at end of file