From 1695a3c7bd92a50181cdbce5602aceb269f17593 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Tue, 15 Oct 2024 12:55:50 -0500 Subject: [PATCH 001/118] Created all DTO's required for HTPDataSample --- .../jobs/executors/BulkLoadJobExecutor.java | 6 +- ...essionDatasetSampleAnnotationExecutor.java | 56 +++++++++++++++++++ .../model/ingest/dto/fms/BioSampleAgeDTO.java | 6 ++ .../fms/BioSampleGenomicInformationDTO.java | 7 +++ ...pressionDatasetSampleAnnotationFmsDTO.java | 21 +++++++ ...onDatasetSampleAnnotationIngestFmsDTO.java | 8 +++ .../dto/fms/MicroarraySampleDetailsDTO.java | 6 ++ 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index 9fe7ab0b3..fc1eec4e0 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -55,11 +55,13 @@ public class BulkLoadJobExecutor { @Inject GeneExpressionExecutor geneExpressionExecutor; @Inject SequenceTargetingReagentExecutor sqtrExecutor; @Inject VariantFmsExecutor variantFmsExecutor; + @Inject HTPExpressionDatasetAnnotationExecutor htpExpressionDatasetAnnotationExecutor; + @Inject HTPExpressionDatasetSampleAnnotationExecutor htpExpressionDatasetSampleAnnotationExecutor; + @Inject Gff3ExonExecutor gff3ExonExecutor; @Inject Gff3CDSExecutor gff3CDSExecutor; @Inject Gff3TranscriptExecutor gff3TranscriptExecutor; - @Inject HTPExpressionDatasetAnnotationExecutor htpExpressionDatasetAnnotationExecutor; @Inject ExpressionAtlasExecutor expressionAtlasExecutor; @Inject BiogridOrcExecutor biogridOrcExecutor; @@ -138,6 +140,8 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th expressionAtlasExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.BIOGRID_ORCS) { biogridOrcExecutor.execLoad(bulkLoadFileHistory); + } else if(bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASAMPLE) { + htpExpressionDatasetSampleAnnotationExecutor.execLoad(bulkLoadFileHistory); } else { log.info("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); throw new Exception("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java new file mode 100644 index 000000000..235a448a9 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java @@ -0,0 +1,56 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetAnnotation; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetAnnotationIngestFmsDTO; +import org.apache.commons.lang3.StringUtils; + +public class HTPExpressionDatasetSampleAnnotationExecutor extends LoadFileExecutor{ + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + + + + //HTPExpressionDatasetAnnotationIngestFmsDTO htpExpressionDatasetData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), HTPExpressionDatasetAnnotationIngestFmsDTO.class); + //bulkLoadFileHistory.getBulkLoadFile().setRecordCount(htpExpressionDatasetData.getData().size()); + + //AGRCurationSchemaVersion version = HTPExpressionDatasetAnnotation.class.getAnnotation(AGRCurationSchemaVersion.class); + //bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); + // if (htpExpressionDatasetData.getMetaData() != null && StringUtils.isNotBlank(htpExpressionDatasetData.getMetaData().getRelease())) { + // bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(htpExpressionDatasetData.getMetaData().getRelease()); + // } + + // BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); + // List htpAnnotationsIdsLoaded = new ArrayList<>(); + // List previousIds = htpExpressionDatasetAnnotationService.getAnnotationIdsByDataProvider(dataProvider.name()); + + // bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); + + // bulkLoadFileHistory.setCount((long) htpExpressionDatasetData.getData().size()); + // updateHistory(bulkLoadFileHistory); + + // boolean success = runLoad(bulkLoadFileHistory, dataProvider, htpExpressionDatasetData.getData(), htpAnnotationsIdsLoaded); + + // if (success) { + // runCleanup(htpExpressionDatasetAnnotationService, bulkLoadFileHistory, dataProvider.name(), previousIds, htpAnnotationsIdsLoaded, fms.getFmsDataType()); + // } + // bulkLoadFileHistory.finishLoad(); + + // updateHistory(bulkLoadFileHistory); + // updateExceptions(bulkLoadFileHistory); + + } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); + e.printStackTrace(); + } + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java new file mode 100644 index 000000000..e6948c78f --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java @@ -0,0 +1,6 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +public class BioSampleAgeDTO { + private WhenExpressedDTO stage; + private String age; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java new file mode 100644 index 000000000..d8c3f2c24 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java @@ -0,0 +1,7 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +public class BioSampleGenomicInformationDTO { + private String biosampleId; + private String idType; + private String bioSampleText; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java new file mode 100644 index 000000000..083b6e9a2 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java @@ -0,0 +1,21 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +import java.util.List; + +public class HTPExpressionDatasetSampleAnnotationFmsDTO { + private HTPIdFmsDTO sampleId; + private String sampleTitle; + private String sampleType; + private BioSampleAgeDTO sampleAge; + private List sampleLocations; + private String abundance; + private BioSampleGenomicInformationDTO genomicInformation; + private String taxonId; + private String sex; + private String assayType; + private String sequencingFormat; + private List assemblyVersions; + private String notes; + private List datasetIds; + private MicroarraySampleDetailsDTO microarraySampleDetails; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java new file mode 100644 index 000000000..225109fd3 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java @@ -0,0 +1,8 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +import java.util.List; + +public class HTPExpressionDatasetSampleAnnotationIngestFmsDTO { + private MetaDataFmsDTO metaData; + private List data; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java new file mode 100644 index 000000000..2288b7061 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java @@ -0,0 +1,6 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +public class MicroarraySampleDetailsDTO { + private String channelId; + private Integer channelNum; +} From 5f0a1aa3861c332eaf36b1b2f3f9a5ae989df3a9 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Tue, 15 Oct 2024 14:22:32 -0500 Subject: [PATCH 002/118] Added service, validator and DAO for HTPDataSample --- ...PExpressionDatasetSampleAnnotationDAO.java | 11 ++++ ...essionDatasetSampleAnnotationExecutor.java | 62 ++++++++++++------- ...pressionDatasetSampleAnnotationFmsDTO.java | 9 ++- ...onDatasetSampleAnnotationIngestFmsDTO.java | 9 ++- ...ressionDatasetSampleAnnotationService.java | 42 +++++++++++++ ...atasetSampleAnnotationFmsDTOValidator.java | 14 +++++ 6 files changed, 121 insertions(+), 26 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java diff --git a/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java new file mode 100644 index 000000000..06e7b4673 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java @@ -0,0 +1,11 @@ +package org.alliancegenome.curation_api.dao; + +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; + +public class HTPExpressionDatasetSampleAnnotationDAO extends BaseSQLDAO { + + protected HTPExpressionDatasetSampleAnnotationDAO() { + super(HTPExpressionDatasetSampleAnnotation.class); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java index 235a448a9..bcecf37b7 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java @@ -5,52 +5,66 @@ import java.util.List; import java.util.zip.GZIPInputStream; +import org.alliancegenome.curation_api.dao.ExternalDataBaseEntityDAO; +import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; -import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetAnnotation; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; -import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetAnnotationIngestFmsDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationIngestFmsDTO; +import org.alliancegenome.curation_api.services.ExternalDataBaseEntityService; +import org.alliancegenome.curation_api.services.HTPExpressionDatasetSampleAnnotationService; import org.apache.commons.lang3.StringUtils; +import jakarta.inject.Inject; + public class HTPExpressionDatasetSampleAnnotationExecutor extends LoadFileExecutor{ + + @Inject ExternalDataBaseEntityService externalDataBaseEntityService; + @Inject ExternalDataBaseEntityDAO externalDataBaseEntityDAO; + @Inject HTPExpressionDatasetSampleAnnotationService HTPExpressionDatasetSampleAnnotationService; + @Inject HTPExpressionDatasetSampleAnnotationDAO HTPExpressionDatasetSampleAnnotationDAO; + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); - - - //HTPExpressionDatasetAnnotationIngestFmsDTO htpExpressionDatasetData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), HTPExpressionDatasetAnnotationIngestFmsDTO.class); - //bulkLoadFileHistory.getBulkLoadFile().setRecordCount(htpExpressionDatasetData.getData().size()); + HTPExpressionDatasetSampleAnnotationIngestFmsDTO htpExpressionDatasetSampleData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), HTPExpressionDatasetSampleAnnotationIngestFmsDTO.class); + bulkLoadFileHistory.getBulkLoadFile().setRecordCount(htpExpressionDatasetSampleData.getData().size()); - //AGRCurationSchemaVersion version = HTPExpressionDatasetAnnotation.class.getAnnotation(AGRCurationSchemaVersion.class); - //bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); - // if (htpExpressionDatasetData.getMetaData() != null && StringUtils.isNotBlank(htpExpressionDatasetData.getMetaData().getRelease())) { - // bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(htpExpressionDatasetData.getMetaData().getRelease()); - // } + AGRCurationSchemaVersion version = HTPExpressionDatasetSampleAnnotation.class.getAnnotation(AGRCurationSchemaVersion.class); + bulkLoadFileHistory.getBulkLoadFile().setLinkMLSchemaVersion(version.max()); + if (htpExpressionDatasetSampleData.getMetaData() != null && StringUtils.isNotBlank(htpExpressionDatasetSampleData.getMetaData().getRelease())) { + bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(htpExpressionDatasetSampleData.getMetaData().getRelease()); + } - // BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); - // List htpAnnotationsIdsLoaded = new ArrayList<>(); - // List previousIds = htpExpressionDatasetAnnotationService.getAnnotationIdsByDataProvider(dataProvider.name()); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); + List htpAnnotationsIdsLoaded = new ArrayList<>(); + List previousIds = HTPExpressionDatasetSampleAnnotationService.getAnnotationIdsByDataProvider(dataProvider.name()); - // bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - // bulkLoadFileHistory.setCount((long) htpExpressionDatasetData.getData().size()); - // updateHistory(bulkLoadFileHistory); + bulkLoadFileHistory.setCount((long) htpExpressionDatasetSampleData.getData().size()); + updateHistory(bulkLoadFileHistory); - // boolean success = runLoad(bulkLoadFileHistory, dataProvider, htpExpressionDatasetData.getData(), htpAnnotationsIdsLoaded); + boolean success = runLoad(bulkLoadFileHistory, dataProvider, htpExpressionDatasetSampleData.getData(), htpAnnotationsIdsLoaded); - // if (success) { - // runCleanup(htpExpressionDatasetAnnotationService, bulkLoadFileHistory, dataProvider.name(), previousIds, htpAnnotationsIdsLoaded, fms.getFmsDataType()); - // } - // bulkLoadFileHistory.finishLoad(); + if (success) { + runCleanup(HTPExpressionDatasetSampleAnnotationService, bulkLoadFileHistory, dataProvider.name(), previousIds, htpAnnotationsIdsLoaded, fms.getFmsDataType()); + } + bulkLoadFileHistory.finishLoad(); - // updateHistory(bulkLoadFileHistory); - // updateExceptions(bulkLoadFileHistory); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); } catch (Exception e) { failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } + private boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List htpDatasetAnnotations, List htpAnnotationsIdsLoaded) { + return true; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java index 083b6e9a2..4fba6b40e 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java @@ -2,7 +2,14 @@ import java.util.List; -public class HTPExpressionDatasetSampleAnnotationFmsDTO { +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class HTPExpressionDatasetSampleAnnotationFmsDTO extends BaseDTO{ private HTPIdFmsDTO sampleId; private String sampleTitle; private String sampleType; diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java index 225109fd3..61a85b7d4 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java @@ -2,7 +2,14 @@ import java.util.List; -public class HTPExpressionDatasetSampleAnnotationIngestFmsDTO { +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class HTPExpressionDatasetSampleAnnotationIngestFmsDTO extends BaseDTO{ private MetaDataFmsDTO metaData; private List data; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java new file mode 100644 index 000000000..911c662cc --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java @@ -0,0 +1,42 @@ +package org.alliancegenome.curation_api.services; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.alliancegenome.curation_api.constants.EntityFieldConstants; +import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; +import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import org.alliancegenome.curation_api.services.validation.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTOValidator; + +import jakarta.annotation.PostConstruct; +import jakarta.inject.Inject; + +public class HTPExpressionDatasetSampleAnnotationService extends BaseEntityCrudService implements BaseUpsertServiceInterface { + @Inject HTPExpressionDatasetSampleAnnotationDAO htpExpressionDatasetSampleAnnotationDAO; + @Inject HTPExpressionDatasetSampleAnnotationFmsDTOValidator htpExpressionDatasetSampleAnnotationFmsDtoValidator; + + @Override + @PostConstruct + protected void init() { + setSQLDao(htpExpressionDatasetSampleAnnotationDAO); + } + + public HTPExpressionDatasetSampleAnnotation upsert(HTPExpressionDatasetSampleAnnotationFmsDTO htpExpressionDatasetSampleAnnotationData, BackendBulkDataProvider backendBulkDataProvider) throws ValidationException { + return htpExpressionDatasetSampleAnnotationFmsDtoValidator.validateHTPExpressionDatasetSampleAnnotationFmsDTO(htpExpressionDatasetSampleAnnotationData, backendBulkDataProvider); + } + + public List getAnnotationIdsByDataProvider(String dataProvider) { + Map params = new HashMap<>(); + params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider); + List ids = htpExpressionDatasetSampleAnnotationDAO.findIdsByParams(params); + ids.removeIf(Objects::isNull); + return ids; + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java new file mode 100644 index 000000000..7d2280739 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -0,0 +1,14 @@ +package org.alliancegenome.curation_api.services.validation.dto.fms; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; + +public class HTPExpressionDatasetSampleAnnotationFmsDTOValidator { + + public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAnnotationFmsDTO(HTPExpressionDatasetSampleAnnotationFmsDTO dto, BackendBulkDataProvider backendBulkDataProvider) throws ValidationException { + return null; + } + +} From 8fb955cb705e59ed7880eec5c8edfd605a2cde56 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Mon, 28 Oct 2024 22:51:23 -0500 Subject: [PATCH 003/118] Validation of HTPDataSample annotations --- .../constants/VocabularyConstants.java | 3 +- ...PExpressionDatasetSampleAnnotationDAO.java | 3 + ...essionDatasetSampleAnnotationExecutor.java | 49 +++- .../HTPExpressionDatasetSampleAnnotation.java | 2 +- .../model/ingest/dto/fms/BioSampleAgeDTO.java | 9 +- .../fms/BioSampleGenomicInformationDTO.java | 8 +- ...pressionDatasetSampleAnnotationFmsDTO.java | 3 +- .../dto/fms/MicroarraySampleDetailsDTO.java | 9 +- ...ressionDatasetSampleAnnotationService.java | 5 +- ...neExpressionAnnotationFmsDTOValidator.java | 86 +++--- ...atasetSampleAnnotationFmsDTOValidator.java | 249 +++++++++++++++++- ...0.63__htpdatasample_adding_vocab_terms.sql | 3 + 12 files changed, 372 insertions(+), 57 deletions(-) create mode 100644 src/main/resources/db/migration/v0.37.0.63__htpdatasample_adding_vocab_terms.sql diff --git a/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java index db0a0d007..9f20f49fb 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java @@ -84,7 +84,8 @@ private VocabularyConstants() { public static final String ANATOMICAL_SUBSTRUCTURE_QUALIFIER = "anatomical_subtructure_qualifier"; public static final String CELLULAR_COMPONENT_QUALIFIER = "cellular_component_qualifier"; - public static final String HTP_DATASET_CATEGORY_TAGS = "data_set_category_tags"; + public static final String HTP_DATASET_CATEGORY_TAGS_VOCABULARY = "data_set_category_tags"; public static final String HTP_DATASET_NOTE_TYPE_VOCABULARY_TERM_SET = "htp_expression_dataset_note_type"; public static final String HTP_DATASET_SAMPLE_NOTE_TYPE_VOCABULARY_TERM_SET = "htp_expression_dataset_sample_note_type"; + public static final String HTP_DATASET_SAMPLE_SEQUENCE_FORMAT_VOCABULARY = "htp_data_sample_sequencing_format"; } diff --git a/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java index 06e7b4673..68715e680 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java @@ -3,6 +3,9 @@ import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped public class HTPExpressionDatasetSampleAnnotationDAO extends BaseSQLDAO { protected HTPExpressionDatasetSampleAnnotationDAO() { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java index bcecf37b7..841e60cba 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java @@ -8,6 +8,8 @@ import org.alliancegenome.curation_api.dao.ExternalDataBaseEntityDAO; import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; @@ -16,16 +18,20 @@ import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationIngestFmsDTO; import org.alliancegenome.curation_api.services.ExternalDataBaseEntityService; import org.alliancegenome.curation_api.services.HTPExpressionDatasetSampleAnnotationService; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.StringUtils; +import io.quarkus.logging.Log; +import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; +@ApplicationScoped public class HTPExpressionDatasetSampleAnnotationExecutor extends LoadFileExecutor{ @Inject ExternalDataBaseEntityService externalDataBaseEntityService; @Inject ExternalDataBaseEntityDAO externalDataBaseEntityDAO; - @Inject HTPExpressionDatasetSampleAnnotationService HTPExpressionDatasetSampleAnnotationService; - @Inject HTPExpressionDatasetSampleAnnotationDAO HTPExpressionDatasetSampleAnnotationDAO; + @Inject HTPExpressionDatasetSampleAnnotationService htpExpressionDatasetSampleAnnotationService; + @Inject HTPExpressionDatasetSampleAnnotationDAO htpExpressionDatasetSampleAnnotationDAO; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { @@ -42,7 +48,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); List htpAnnotationsIdsLoaded = new ArrayList<>(); - List previousIds = HTPExpressionDatasetSampleAnnotationService.getAnnotationIdsByDataProvider(dataProvider.name()); + List previousIds = htpExpressionDatasetSampleAnnotationService.getAnnotationIdsByDataProvider(dataProvider.name()); bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); @@ -52,7 +58,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { boolean success = runLoad(bulkLoadFileHistory, dataProvider, htpExpressionDatasetSampleData.getData(), htpAnnotationsIdsLoaded); if (success) { - runCleanup(HTPExpressionDatasetSampleAnnotationService, bulkLoadFileHistory, dataProvider.name(), previousIds, htpAnnotationsIdsLoaded, fms.getFmsDataType()); + runCleanup(htpExpressionDatasetSampleAnnotationService, bulkLoadFileHistory, dataProvider.name(), previousIds, htpAnnotationsIdsLoaded, fms.getFmsDataType()); } bulkLoadFileHistory.finishLoad(); @@ -64,7 +70,40 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { e.printStackTrace(); } } - private boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List htpDatasetAnnotations, List htpAnnotationsIdsLoaded) { + + private boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List htpDatasetSampleAnnotations, List htpAnnotationsIdsLoaded) { + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + ph.startProcess("HTP Expression Dataset Sample Annotation DTO Update for " + dataProvider.name(), htpDatasetSampleAnnotations.size()); + + updateHistory(history); + for (HTPExpressionDatasetSampleAnnotationFmsDTO dto : htpDatasetSampleAnnotations) { + try { + HTPExpressionDatasetSampleAnnotation dbObject = htpExpressionDatasetSampleAnnotationService.upsert(dto, dataProvider); + history.incrementCompleted(); + if (dbObject != null) { + htpAnnotationsIdsLoaded.add(dbObject.getId()); + } + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(dto, e.getMessage(), e.getStackTrace())); + } + if (history.getErrorRate() > 0.25) { + Log.error("Failure Rate > 25% aborting load"); + updateHistory(history); + updateExceptions(history); + failLoadAboveErrorRateCutoff(history); + return false; + } + ph.progressProcess(); + } + updateHistory(history); + updateExceptions(history); + ph.finishProcess(); return true; } } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/HTPExpressionDatasetSampleAnnotation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/HTPExpressionDatasetSampleAnnotation.java index 4941d6d84..49cab988a 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/HTPExpressionDatasetSampleAnnotation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/HTPExpressionDatasetSampleAnnotation.java @@ -115,7 +115,7 @@ public class HTPExpressionDatasetSampleAnnotation extends AuditedObject { private VocabularyTerm sequencingFormat; @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) - @ManyToMany + @ManyToMany(cascade = CascadeType.ALL) @JoinTable(indexes = { @Index(name = "htpdatasample_anatomicalsite_htpdatasample_index", columnList = "htpexpressiondatasetsampleannotation_id"), @Index(name = "htpdatasample_anatomicalsite_samplelocations_index", columnList = "htpexpressionsamplelocations_id") diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java index e6948c78f..3b191176d 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java @@ -1,6 +1,13 @@ package org.alliancegenome.curation_api.model.ingest.dto.fms; -public class BioSampleAgeDTO { +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class BioSampleAgeDTO extends BaseDTO { private WhenExpressedDTO stage; private String age; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java index d8c3f2c24..6709ba9ae 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java @@ -1,6 +1,12 @@ package org.alliancegenome.curation_api.model.ingest.dto.fms; +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; -public class BioSampleGenomicInformationDTO { +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class BioSampleGenomicInformationDTO extends BaseDTO{ private String biosampleId; private String idType; private String bioSampleText; diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java index 4fba6b40e..88a8d883f 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java @@ -23,6 +23,7 @@ public class HTPExpressionDatasetSampleAnnotationFmsDTO extends BaseDTO{ private String sequencingFormat; private List assemblyVersions; private String notes; - private List datasetIds; + private List datasetIds; private MicroarraySampleDetailsDTO microarraySampleDetails; + private String dateAssigned; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java index 2288b7061..d802bde3e 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java @@ -1,6 +1,13 @@ package org.alliancegenome.curation_api.model.ingest.dto.fms; -public class MicroarraySampleDetailsDTO { +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class MicroarraySampleDetailsDTO extends BaseDTO{ private String channelId; private Integer channelNum; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java index 911c662cc..31358cdca 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java @@ -16,10 +16,13 @@ import org.alliancegenome.curation_api.services.validation.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTOValidator; import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; +@RequestScoped public class HTPExpressionDatasetSampleAnnotationService extends BaseEntityCrudService implements BaseUpsertServiceInterface { - @Inject HTPExpressionDatasetSampleAnnotationDAO htpExpressionDatasetSampleAnnotationDAO; + + @Inject HTPExpressionDatasetSampleAnnotationDAO htpExpressionDatasetSampleAnnotationDAO; @Inject HTPExpressionDatasetSampleAnnotationFmsDTOValidator htpExpressionDatasetSampleAnnotationFmsDtoValidator; @Override diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index 203bd6d5b..0dadf6213 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -12,6 +12,8 @@ import org.alliancegenome.curation_api.model.entities.ontology.*; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.UberonSlimTermDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.WhenExpressedDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedDTO; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.DataProviderService; @@ -102,21 +104,21 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr } } - ObjectResponse anatomicalSiteObjectResponse = validateAnatomicalSite(geneExpressionFmsDTO); + ObjectResponse anatomicalSiteObjectResponse = validateAnatomicalSite(geneExpressionFmsDTO.getWhereExpressed()); if (anatomicalSiteObjectResponse.hasErrors()) { response.addErrorMessage("expressionPattern", anatomicalSiteObjectResponse.errorMessagesString()); } else { geneExpressionAnnotation.setWhereExpressedStatement(geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement()); - AnatomicalSite anatomicalSite = updateAnatomicalSite(anatomicalSiteObjectResponse, geneExpressionAnnotation); + AnatomicalSite anatomicalSite = updateAnatomicalSite(anatomicalSiteObjectResponse, geneExpressionAnnotation.getExpressionPattern().getWhereExpressed()); geneExpressionAnnotation.getExpressionPattern().setWhereExpressed(anatomicalSite); } - ObjectResponse temporalContextObjectResponse = validateTemporalContext(geneExpressionFmsDTO); + ObjectResponse temporalContextObjectResponse = validateTemporalContext(geneExpressionFmsDTO.getWhenExpressed()); if (temporalContextObjectResponse.hasErrors()) { response.addErrorMessage("expressionPattern", temporalContextObjectResponse.errorMessagesString()); } else { geneExpressionAnnotation.setWhenExpressedStageName(geneExpressionFmsDTO.getWhenExpressed().getStageName()); - TemporalContext temporalContext = updateTemporalContext(temporalContextObjectResponse, geneExpressionAnnotation); + TemporalContext temporalContext = updateTemporalContext(temporalContextObjectResponse, geneExpressionAnnotation.getExpressionPattern().getWhenExpressed()); geneExpressionAnnotation.getExpressionPattern().setWhenExpressed(temporalContext); } @@ -131,18 +133,18 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr return geneExpressionAnnotation; } - private ObjectResponse validateTemporalContext(GeneExpressionFmsDTO geneExpressionFmsDTO) { + protected ObjectResponse validateTemporalContext(WhenExpressedDTO whenExpressedDTO) { ObjectResponse response = new ObjectResponse<>(); TemporalContext temporalContext = new TemporalContext(); - if (ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhenExpressed())) { - response.addErrorMessage("whenExpressed - ", ValidationConstants.REQUIRED_MESSAGE + " (" + geneExpressionFmsDTO.getWhenExpressed() + ")"); + if (ObjectUtils.isEmpty(whenExpressedDTO)) { + response.addErrorMessage("whenExpressed - ", ValidationConstants.REQUIRED_MESSAGE + " (" + whenExpressedDTO + ")"); return response; } else { - String stageName = geneExpressionFmsDTO.getWhenExpressed().getStageName(); + String stageName = whenExpressedDTO.getStageName(); if (ObjectUtils.isEmpty(stageName)) { response.addErrorMessage("whenExpressed - whenExpressedStageName", ValidationConstants.REQUIRED_MESSAGE + " (" + stageName + ")"); } - String stageTermId = geneExpressionFmsDTO.getWhenExpressed().getStageTermId(); + String stageTermId = whenExpressedDTO.getStageTermId(); if (!ObjectUtils.isEmpty(stageTermId)) { StageTerm stageTerm = stageTermService.findByCurie(stageTermId); if (stageTerm == null) { @@ -151,9 +153,9 @@ private ObjectResponse validateTemporalContext(GeneExpressionFm temporalContext.setDevelopmentalStageStart(stageTerm); } } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhenExpressed().getStageUberonSlimTerm())) { - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhenExpressed().getStageUberonSlimTerm().getUberonTerm())) { - String stageUberonSlimTermId = geneExpressionFmsDTO.getWhenExpressed().getStageUberonSlimTerm().getUberonTerm(); + if (!ObjectUtils.isEmpty(whenExpressedDTO.getStageUberonSlimTerm())) { + if (!ObjectUtils.isEmpty(whenExpressedDTO.getStageUberonSlimTerm().getUberonTerm())) { + String stageUberonSlimTermId = whenExpressedDTO.getStageUberonSlimTerm().getUberonTerm(); if (!ObjectUtils.isEmpty(stageUberonSlimTermId)) { VocabularyTerm stageUberonSlimTerm = vocabularyTermService.getTermInVocabulary(VocabularyConstants.STAGE_UBERON_SLIM_TERMS, stageUberonSlimTermId).getEntity(); if (stageUberonSlimTerm == null) { @@ -169,47 +171,47 @@ private ObjectResponse validateTemporalContext(GeneExpressionFm return response; } - private ObjectResponse validateAnatomicalSite(GeneExpressionFmsDTO geneExpressionFmsDTO) { + protected ObjectResponse validateAnatomicalSite(WhereExpressedDTO whereExpressedDTO) { ObjectResponse response = new ObjectResponse<>(); AnatomicalSite anatomicalSite = new AnatomicalSite(); - if (ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed())) { - response.addErrorMessage("whereExpressed - ", ValidationConstants.REQUIRED_MESSAGE + " (" + geneExpressionFmsDTO.getWhereExpressed() + ")"); + if (ObjectUtils.isEmpty(whereExpressedDTO)) { + response.addErrorMessage("whereExpressed - ", ValidationConstants.REQUIRED_MESSAGE + " (" + whereExpressedDTO + ")"); } else { - if (ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement())) { - response.addErrorMessage("whereExpressed - whereExpressedStatement", ValidationConstants.REQUIRED_MESSAGE + " (" + geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement() + ")"); + if (ObjectUtils.isEmpty(whereExpressedDTO.getWhereExpressedStatement())) { + response.addErrorMessage("whereExpressed - whereExpressedStatement", ValidationConstants.REQUIRED_MESSAGE + " (" + whereExpressedDTO.getWhereExpressedStatement() + ")"); return response; } - boolean lackAnatomicalStructureTermId = ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId()); - boolean lackStructureUberonSlimTermIds = ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureUberonSlimTermIds()); - boolean lackCellularComponentId = ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId()); + boolean lackAnatomicalStructureTermId = ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalStructureTermId()); + boolean lackStructureUberonSlimTermIds = ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalStructureUberonSlimTermIds()); + boolean lackCellularComponentId = ObjectUtils.isEmpty(whereExpressedDTO.getCellularComponentTermId()); if ((lackAnatomicalStructureTermId || lackStructureUberonSlimTermIds) && lackCellularComponentId) { - response.addErrorMessage("whereExpressed - MUST HAVe (anatomicalStructureTermId and anatomicalStructureUberonSlimTermIds) or cellularComponentTermId", ValidationConstants.REQUIRED_MESSAGE + " (" + geneExpressionFmsDTO.getWhereExpressed() + ")"); + response.addErrorMessage("whereExpressed - MUST HAVe (anatomicalStructureTermId and anatomicalStructureUberonSlimTermIds) or cellularComponentTermId", ValidationConstants.REQUIRED_MESSAGE + " (" + whereExpressedDTO + ")"); } if (!lackAnatomicalStructureTermId) { - AnatomicalTerm anatomicalStructureTerm = anatomicalTermService.findByCurie(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId()); + AnatomicalTerm anatomicalStructureTerm = anatomicalTermService.findByCurie(whereExpressedDTO.getAnatomicalStructureTermId()); if (anatomicalStructureTerm == null) { - response.addErrorMessage("whereExpressed - anatomicalStructureTermId", ValidationConstants.INVALID_MESSAGE + " (" + geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId() + ")"); + response.addErrorMessage("whereExpressed - anatomicalStructureTermId", ValidationConstants.INVALID_MESSAGE + " (" + whereExpressedDTO.getAnatomicalStructureTermId() + ")"); } else { anatomicalSite.setAnatomicalStructure(anatomicalStructureTerm); } } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureTermId())) { - AnatomicalTerm anatomicalSubStructureTerm = anatomicalTermService.findByCurie(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureTermId()); + if (!ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalSubStructureTermId())) { + AnatomicalTerm anatomicalSubStructureTerm = anatomicalTermService.findByCurie(whereExpressedDTO.getAnatomicalSubStructureTermId()); if (anatomicalSubStructureTerm == null) { - response.addErrorMessage("whereExpressed - anatomicalSubStructureTermId", ValidationConstants.INVALID_MESSAGE + " (" + geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureTermId() + ")"); + response.addErrorMessage("whereExpressed - anatomicalSubStructureTermId", ValidationConstants.INVALID_MESSAGE + " (" + whereExpressedDTO.getAnatomicalSubStructureTermId() + ")"); } else { anatomicalSite.setAnatomicalSubstructure(anatomicalSubStructureTerm); } } if (!lackCellularComponentId) { - GOTerm cellularComponent = goTermService.findByCurie(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId()); + GOTerm cellularComponent = goTermService.findByCurie(whereExpressedDTO.getCellularComponentTermId()); if (cellularComponent == null) { - response.addErrorMessage("whereExpressed - cellularComponentTermId", ValidationConstants.INVALID_MESSAGE + " (" + geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId() + ")"); + response.addErrorMessage("whereExpressed - cellularComponentTermId", ValidationConstants.INVALID_MESSAGE + " (" + whereExpressedDTO.getCellularComponentTermId() + ")"); } else { GOTerm cellularComponentRibbon = goTermService.findSubsetTerm(cellularComponent, "goslim_agr"); if (cellularComponentRibbon == null) { @@ -222,8 +224,8 @@ private ObjectResponse validateAnatomicalSite(GeneExpressionFmsD } } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureQualifierTermId())) { - String anatomicalstructurequalifiertermId = geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureQualifierTermId(); + if (!ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalStructureQualifierTermId())) { + String anatomicalstructurequalifiertermId = whereExpressedDTO.getAnatomicalStructureQualifierTermId(); if (vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.ANATOMICAL_STRUCTURE_QUALIFIER, anatomicalstructurequalifiertermId) != null) { OntologyTerm anatomicalStructureQualifierTerm = ontologyTermService.findByCurieOrSecondaryId(anatomicalstructurequalifiertermId); if (anatomicalStructureQualifierTerm == null) { @@ -234,8 +236,8 @@ private ObjectResponse validateAnatomicalSite(GeneExpressionFmsD } } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureQualifierTermId())) { - String anatomicalsubstructurequalifierId = geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureQualifierTermId(); + if (!ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalSubStructureQualifierTermId())) { + String anatomicalsubstructurequalifierId = whereExpressedDTO.getAnatomicalSubStructureQualifierTermId(); if (vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.ANATOMICAL_SUBSTRUCTURE_QUALIFIER, anatomicalsubstructurequalifierId) != null) { OntologyTerm anatomicalSubStructureQualifierTerm = ontologyTermService.findByCurieOrSecondaryId(anatomicalsubstructurequalifierId); if (anatomicalSubStructureQualifierTerm == null) { @@ -246,8 +248,8 @@ private ObjectResponse validateAnatomicalSite(GeneExpressionFmsD } } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentQualifierTermId())) { - String cellularComponentQualifierTermId = geneExpressionFmsDTO.getWhereExpressed().getCellularComponentQualifierTermId(); + if (!ObjectUtils.isEmpty(whereExpressedDTO.getCellularComponentQualifierTermId())) { + String cellularComponentQualifierTermId = whereExpressedDTO.getCellularComponentQualifierTermId(); if (vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.CELLULAR_COMPONENT_QUALIFIER, cellularComponentQualifierTermId) != null) { OntologyTerm cellularComponentQualifierTerm = ontologyTermService.findByCurieOrSecondaryId(cellularComponentQualifierTermId); if (cellularComponentQualifierTerm == null) { @@ -258,8 +260,8 @@ private ObjectResponse validateAnatomicalSite(GeneExpressionFmsD } } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureUberonSlimTermIds())) { - List anatomicalStructureUberonSlimTermIds = geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureUberonSlimTermIds(); + if (!ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalStructureUberonSlimTermIds())) { + List anatomicalStructureUberonSlimTermIds = whereExpressedDTO.getAnatomicalStructureUberonSlimTermIds(); List uberonTerms = new ArrayList<>(); for (UberonSlimTermDTO uberonSlimTermDTO: anatomicalStructureUberonSlimTermIds) { if (!uberonSlimTermDTO.getUberonTerm().equals("Other")) { @@ -274,8 +276,8 @@ private ObjectResponse validateAnatomicalSite(GeneExpressionFmsD anatomicalSite.setAnatomicalStructureUberonTerms(uberonTerms); } - if (!ObjectUtils.isEmpty(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureUberonSlimTermIds())) { - List anatomicalSubStructureUberonSlimTermIds = geneExpressionFmsDTO.getWhereExpressed().getAnatomicalSubStructureUberonSlimTermIds(); + if (!ObjectUtils.isEmpty(whereExpressedDTO.getAnatomicalSubStructureUberonSlimTermIds())) { + List anatomicalSubStructureUberonSlimTermIds = whereExpressedDTO.getAnatomicalSubStructureUberonSlimTermIds(); List uberonTerms = new ArrayList<>(); for (UberonSlimTermDTO uberonSlimTermDTO : anatomicalSubStructureUberonSlimTermIds) { if (!uberonSlimTermDTO.getUberonTerm().equals("Other")) { @@ -314,9 +316,9 @@ private ObjectResponse validateEvidence(GeneExpressionFmsDTO geneExpr return response; } - private TemporalContext updateTemporalContext(ObjectResponse temporalContextObjectResponse, GeneExpressionAnnotation geneExpressionAnnotation) { + private TemporalContext updateTemporalContext(ObjectResponse temporalContextObjectResponse, TemporalContext whenExpressed) { TemporalContext temporalContext = temporalContextObjectResponse.getEntity(); - TemporalContext temporalContextDB = geneExpressionAnnotation.getExpressionPattern().getWhenExpressed(); + TemporalContext temporalContextDB = whenExpressed; if (temporalContextDB == null) { temporalContextDB = new TemporalContext(); } @@ -325,9 +327,9 @@ private TemporalContext updateTemporalContext(ObjectResponse te return temporalContextDB; } - private AnatomicalSite updateAnatomicalSite(ObjectResponse anatomicalSiteObjectResponse, GeneExpressionAnnotation geneExpressionAnnotation) { + private AnatomicalSite updateAnatomicalSite(ObjectResponse anatomicalSiteObjectResponse, AnatomicalSite whereExpressed) { AnatomicalSite anatomicalSite = anatomicalSiteObjectResponse.getEntity(); - AnatomicalSite anatomicalSiteDB = geneExpressionAnnotation.getExpressionPattern().getWhereExpressed(); + AnatomicalSite anatomicalSiteDB = whereExpressed; if (anatomicalSiteDB == null) { anatomicalSiteDB = new AnatomicalSite(); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 7d2280739..77fb04567 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -1,14 +1,257 @@ package org.alliancegenome.curation_api.services.validation.dto.fms; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.alliancegenome.curation_api.constants.ValidationConstants; +import org.alliancegenome.curation_api.constants.VocabularyConstants; +import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectValidationException; import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.AffectedGenomicModel; +import org.alliancegenome.curation_api.model.entities.Allele; +import org.alliancegenome.curation_api.model.entities.AnatomicalSite; +import org.alliancegenome.curation_api.model.entities.BioSampleAge; +import org.alliancegenome.curation_api.model.entities.BioSampleGenomicInformation; +import org.alliancegenome.curation_api.model.entities.ExternalDataBaseEntity; import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; +import org.alliancegenome.curation_api.model.entities.MicroarraySampleDetails; +import org.alliancegenome.curation_api.model.entities.Note; +import org.alliancegenome.curation_api.model.entities.TemporalContext; +import org.alliancegenome.curation_api.model.entities.VocabularyTerm; +import org.alliancegenome.curation_api.model.entities.ontology.MMOTerm; +import org.alliancegenome.curation_api.model.entities.ontology.NCBITaxonTerm; +import org.alliancegenome.curation_api.model.entities.ontology.OBITerm; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.AffectedGenomicModelService; +import org.alliancegenome.curation_api.services.AlleleService; +import org.alliancegenome.curation_api.services.DataProviderService; +import org.alliancegenome.curation_api.services.ExternalDataBaseEntityService; +import org.alliancegenome.curation_api.services.VocabularyTermService; +import org.alliancegenome.curation_api.services.ontology.MmoTermService; +import org.alliancegenome.curation_api.services.ontology.NcbiTaxonTermService; +import org.alliancegenome.curation_api.services.ontology.ObiTermService; +import org.alliancegenome.curation_api.services.ontology.StageTermService; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang.StringUtils; + +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; +@RequestScoped public class HTPExpressionDatasetSampleAnnotationFmsDTOValidator { - public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAnnotationFmsDTO(HTPExpressionDatasetSampleAnnotationFmsDTO dto, BackendBulkDataProvider backendBulkDataProvider) throws ValidationException { - return null; - } + @Inject ExternalDataBaseEntityFmsDTOValidator externalDataBaseEntityFmsDtoValidator; + @Inject GeneExpressionAnnotationFmsDTOValidator geneExpressionAnnotationFmsDTOValidator; + @Inject HTPExpressionDatasetSampleAnnotationDAO htpExpressionDatasetSampleAnnotationDAO; + @Inject VocabularyTermService vocabularyTermService; + @Inject ExternalDataBaseEntityService externalDataBaseEntityService; + @Inject DataProviderService dataProviderService; + @Inject ObiTermService obiTermService; + @Inject MmoTermService mmoTermService; + @Inject StageTermService stageTermService; + @Inject AlleleService alleleService; + @Inject AffectedGenomicModelService affectedGenomicModelService; + @Inject NcbiTaxonTermService ncbiTaxonTermService; + + @Transactional + public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAnnotationFmsDTO(HTPExpressionDatasetSampleAnnotationFmsDTO dto, BackendBulkDataProvider backendBulkDataProvider) throws ValidationException { + ObjectResponse htpSampleAnnotationResponse = new ObjectResponse<>(); + HTPExpressionDatasetSampleAnnotation htpSampleAnnotation; + + if (dto.getSampleId().getPrimaryId() != null) { + String curie = dto.getSampleId().getPrimaryId(); + ExternalDataBaseEntity externalDbEntity = externalDataBaseEntityFmsDtoValidator.validateExternalDataBaseEntityFmsDTO(dto.getSampleId()); + if (externalDbEntity != null) { + Long htpSampleId = externalDbEntity.getId(); + Map params = new HashMap<>(); + params.put("htpExpressionSample.id", htpSampleId); + SearchResponse searchResponse = htpExpressionDatasetSampleAnnotationDAO.findByParams(params); + if (searchResponse == null || searchResponse.getSingleResult() == null) { + htpSampleAnnotation = new HTPExpressionDatasetSampleAnnotation(); + htpSampleAnnotation.setHtpExpressionSample(externalDbEntity); + } else { + htpSampleAnnotation = searchResponse.getSingleResult(); + } + } else { + htpSampleAnnotationResponse.addErrorMessage("SampleId", ValidationConstants.INVALID_MESSAGE + " (" + curie + ")"); + htpSampleAnnotation = new HTPExpressionDatasetSampleAnnotation(); + } + } else { + htpSampleAnnotationResponse.addErrorMessage("SampleId", ValidationConstants.REQUIRED_MESSAGE); + htpSampleAnnotation = new HTPExpressionDatasetSampleAnnotation(); + } + + if (StringUtils.isNotEmpty(dto.getSampleTitle())) { + htpSampleAnnotation.setHtpExpressionSampleTitle(dto.getSampleTitle()); + } + + if (StringUtils.isNotEmpty(dto.getAbundance())) { + htpSampleAnnotation.setAbundance(dto.getAbundance()); + } + + if (StringUtils.isNotEmpty(dto.getSampleType())) { + String curie = dto.getSampleType(); + OBITerm obiTerm = obiTermService.findByCurie(curie); + if (obiTerm != null) { + htpSampleAnnotation.setHtpExpressionSampleType(obiTerm); + } else { + htpSampleAnnotationResponse.addErrorMessage("SampleType", ValidationConstants.INVALID_MESSAGE + " (" + curie + ")"); + } + } else { + htpSampleAnnotationResponse.addErrorMessage("SampleType", ValidationConstants.REQUIRED_MESSAGE); + } + + if (dto.getSampleAge() != null) { + ObjectResponse temporalContextObjectResponse = geneExpressionAnnotationFmsDTOValidator.validateTemporalContext(dto.getSampleAge().getStage()); + if (temporalContextObjectResponse.hasErrors()) { + htpSampleAnnotationResponse.addErrorMessage("BioSampleAge", temporalContextObjectResponse.errorMessagesString()); + } else { + if (htpSampleAnnotation.getHtpExpressionSampleAge() == null) { + htpSampleAnnotation.setHtpExpressionSampleAge(new BioSampleAge()); + htpSampleAnnotation.getHtpExpressionSampleAge().setStage(new TemporalContext()); + } + TemporalContext temporalContext = temporalContextObjectResponse.getEntity(); + TemporalContext temporalContextDB = htpSampleAnnotation.getHtpExpressionSampleAge().getStage(); + if (temporalContextDB == null) { + temporalContextDB = new TemporalContext(); + } + htpSampleAnnotation.getHtpExpressionSampleAge().setWhenExpressedStageName(dto.getSampleAge().getStage().getStageName()); + htpSampleAnnotation.getHtpExpressionSampleAge().setStage(temporalContextDB); + htpSampleAnnotation.getHtpExpressionSampleAge().getStage().setDevelopmentalStageStart(temporalContext.getDevelopmentalStageStart()); + htpSampleAnnotation.getHtpExpressionSampleAge().getStage().setStageUberonSlimTerms(temporalContext.getStageUberonSlimTerms()); + htpSampleAnnotation.getHtpExpressionSampleAge().setAge(dto.getSampleAge().getAge()); + } + } + + if (CollectionUtils.isNotEmpty(dto.getSampleLocations())) { + List htpSampleLocations = new ArrayList<>(); + for (WhereExpressedDTO whereExpressedDTO : dto.getSampleLocations()) { + ObjectResponse anatomicalSiteObjectResponse = geneExpressionAnnotationFmsDTOValidator.validateAnatomicalSite(whereExpressedDTO); + if (anatomicalSiteObjectResponse.hasErrors()) { + htpSampleAnnotationResponse.addErrorMessage("SampleLocations", anatomicalSiteObjectResponse.errorMessagesString()); + } else { + htpSampleLocations.add(anatomicalSiteObjectResponse.getEntity()); + } + } + htpSampleAnnotation.setHtpExpressionSampleLocations(htpSampleLocations); + } + + if (dto.getGenomicInformation() != null) { + if (htpSampleAnnotation.getGenomicInformation() == null) { + htpSampleAnnotation.setGenomicInformation(new BioSampleGenomicInformation()); + } + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { + String identifierString = dto.getGenomicInformation().getBiosampleId(); + Allele allele = alleleService.findByIdentifierString(identifierString); + if (allele == null) { + AffectedGenomicModel agm = affectedGenomicModelService.findByIdentifierString(identifierString); + if (agm == null) { + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation", ValidationConstants.INVALID_MESSAGE + " (" + identifierString + ")"); + } else { + htpSampleAnnotation.getGenomicInformation().setBioSampleAgm(agm); + VocabularyTerm agmType = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.AGM_SUBTYPE_VOCABULARY, dto.getGenomicInformation().getIdType()).getEntity(); + if (agmType != null) { + htpSampleAnnotation.getGenomicInformation().setBioSampleAgmType(agmType); + } + } + } else { + htpSampleAnnotation.getGenomicInformation().setBioSampleAllele(allele); + } + } + } else { + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation", ValidationConstants.REQUIRED_MESSAGE); + } + + if (StringUtils.isNotEmpty(dto.getSex())) { + VocabularyTerm geneticSex = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.GENETIC_SEX_VOCABULARY, dto.getSex()).getEntity(); + if (geneticSex != null) { + htpSampleAnnotation.setGeneticSex(geneticSex); + } + } + + if (StringUtils.isNotEmpty(dto.getAssayType())) { + String curie = dto.getAssayType(); + MMOTerm mmoTerm = mmoTermService.findByCurie(curie); + if (mmoTerm != null) { + htpSampleAnnotation.setExpressionAssayUsed(mmoTerm); + } else { + htpSampleAnnotationResponse.addErrorMessage("AssayType", ValidationConstants.INVALID_MESSAGE + " (" + curie + ")"); + } + } else { + htpSampleAnnotationResponse.addErrorMessage("AssayType", ValidationConstants.REQUIRED_MESSAGE); + } + + if (CollectionUtils.isNotEmpty(dto.getAssemblyVersions())) { + List assemblyVersions = new ArrayList<>(); + for (String assemblyVersion : dto.getAssemblyVersions()) { + assemblyVersions.add(assemblyVersion); + } + htpSampleAnnotation.setAssemblyVersions(assemblyVersions); + } + + if (CollectionUtils.isNotEmpty(dto.getDatasetIds())) { + List datasetIds = new ArrayList<>(); + for (String datasetId : dto.getDatasetIds()) { + ExternalDataBaseEntity externalDbEntity = externalDataBaseEntityService.findByCurie(datasetId); + if (externalDbEntity != null) { + datasetIds.add(externalDbEntity); + } + } + htpSampleAnnotation.setDatasetIds(datasetIds); + } else { + htpSampleAnnotationResponse.addErrorMessage("DatasetIds", ValidationConstants.REQUIRED_MESSAGE); + } + + if (StringUtils.isNotEmpty(dto.getSequencingFormat())) { + VocabularyTerm sequencingFormat = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.HTP_DATASET_SAMPLE_SEQUENCE_FORMAT_VOCABULARY, dto.getSequencingFormat()).getEntity(); + if (sequencingFormat != null) { + htpSampleAnnotation.setSequencingFormat(sequencingFormat); + } + } + + if (StringUtils.isNotEmpty(dto.getTaxonId())) { + ObjectResponse taxonResponse = ncbiTaxonTermService.getByCurie(dto.getTaxonId()); + if (backendBulkDataProvider != null && (backendBulkDataProvider.name().equals("RGD") || backendBulkDataProvider.name().equals("HUMAN")) && !taxonResponse.getEntity().getCurie().equals(backendBulkDataProvider.canonicalTaxonCurie)) { + htpSampleAnnotationResponse.addErrorMessage("taxonId", ValidationConstants.INVALID_MESSAGE + " (" + dto.getTaxonId() + ") for " + backendBulkDataProvider.name() + " load"); + } + htpSampleAnnotation.setTaxon(taxonResponse.getEntity()); + } + + if (dto.getMicroarraySampleDetails() != null) { + if (htpSampleAnnotation.getMicroarraySampleDetails() == null) { + htpSampleAnnotation.setMicroarraySampleDetails(new MicroarraySampleDetails()); + } + if (StringUtils.isNotEmpty(dto.getMicroarraySampleDetails().getChannelId())) { + htpSampleAnnotation.getMicroarraySampleDetails().setChannelId(dto.getMicroarraySampleDetails().getChannelId()); + } + if (dto.getMicroarraySampleDetails().getChannelNum() != null) { + htpSampleAnnotation.getMicroarraySampleDetails().setChannelNumber(dto.getMicroarraySampleDetails().getChannelNum()); + } + } + + if (StringUtils.isNotEmpty(dto.getNotes())) { + List relatedNotes = new ArrayList<>(); + Note relatedNote = new Note(); + relatedNote.setFreeText(dto.getNotes()); + relatedNote.setNoteType(vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.HTP_DATASET_SAMPLE_NOTE_TYPE_VOCABULARY_TERM_SET, "htp_expression_dataset_sample_note_type").getEntity()); + relatedNotes.add(relatedNote); + htpSampleAnnotation.setRelatedNotes(relatedNotes); + } + + htpSampleAnnotation.setDataProvider(dataProviderService.getDefaultDataProvider(backendBulkDataProvider.sourceOrganization)); + + if (htpSampleAnnotationResponse.hasErrors()) { + throw new ObjectValidationException(dto, htpSampleAnnotationResponse.errorMessagesString()); + } + return htpExpressionDatasetSampleAnnotationDAO.persist(htpSampleAnnotation); + } } diff --git a/src/main/resources/db/migration/v0.37.0.63__htpdatasample_adding_vocab_terms.sql b/src/main/resources/db/migration/v0.37.0.63__htpdatasample_adding_vocab_terms.sql new file mode 100644 index 000000000..5665d0c99 --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.63__htpdatasample_adding_vocab_terms.sql @@ -0,0 +1,3 @@ +INSERT INTO vocabulary (id, name, vocabularylabel) VALUES (nextval('vocabulary_seq'), 'HTP Data Sample Sequencing Format','htp_data_sample_sequencing_format'); +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'single',id FROM vocabulary WHERE vocabularylabel = 'htp_data_sample_sequencing_format'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'paired',id FROM vocabulary WHERE vocabularylabel = 'htp_data_sample_sequencing_format'; From 54acf165816a880fb917398dfdb33a45221cb234 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Mon, 28 Oct 2024 22:59:34 -0500 Subject: [PATCH 004/118] Resolving Merge conflicts --- ...terms.sql => v0.37.0.65__htpdatasample_adding_vocab_terms.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{v0.37.0.63__htpdatasample_adding_vocab_terms.sql => v0.37.0.65__htpdatasample_adding_vocab_terms.sql} (100%) diff --git a/src/main/resources/db/migration/v0.37.0.63__htpdatasample_adding_vocab_terms.sql b/src/main/resources/db/migration/v0.37.0.65__htpdatasample_adding_vocab_terms.sql similarity index 100% rename from src/main/resources/db/migration/v0.37.0.63__htpdatasample_adding_vocab_terms.sql rename to src/main/resources/db/migration/v0.37.0.65__htpdatasample_adding_vocab_terms.sql From adfcd4f8fed20d9b60aa767b68304465d10a681a Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Mon, 28 Oct 2024 23:10:57 -0500 Subject: [PATCH 005/118] Checkstyle fix --- .../dao/HTPExpressionDatasetSampleAnnotationDAO.java | 4 ++-- .../curation_api/jobs/executors/BulkLoadJobExecutor.java | 2 +- .../HTPExpressionDatasetSampleAnnotationExecutor.java | 8 ++++---- .../ingest/dto/fms/BioSampleGenomicInformationDTO.java | 2 +- .../fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java | 2 +- .../HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java | 4 ++-- .../model/ingest/dto/fms/MicroarraySampleDetailsDTO.java | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java index 68715e680..910d492f8 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/HTPExpressionDatasetSampleAnnotationDAO.java @@ -7,8 +7,8 @@ @ApplicationScoped public class HTPExpressionDatasetSampleAnnotationDAO extends BaseSQLDAO { - - protected HTPExpressionDatasetSampleAnnotationDAO() { + + protected HTPExpressionDatasetSampleAnnotationDAO() { super(HTPExpressionDatasetSampleAnnotation.class); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index fc1eec4e0..ae56ef352 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -140,7 +140,7 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th expressionAtlasExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.BIOGRID_ORCS) { biogridOrcExecutor.execLoad(bulkLoadFileHistory); - } else if(bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASAMPLE) { + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASAMPLE) { htpExpressionDatasetSampleAnnotationExecutor.execLoad(bulkLoadFileHistory); } else { log.info("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java index 841e60cba..1f4e9c169 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java @@ -26,15 +26,15 @@ import jakarta.inject.Inject; @ApplicationScoped -public class HTPExpressionDatasetSampleAnnotationExecutor extends LoadFileExecutor{ +public class HTPExpressionDatasetSampleAnnotationExecutor extends LoadFileExecutor { @Inject ExternalDataBaseEntityService externalDataBaseEntityService; @Inject ExternalDataBaseEntityDAO externalDataBaseEntityDAO; @Inject HTPExpressionDatasetSampleAnnotationService htpExpressionDatasetSampleAnnotationService; @Inject HTPExpressionDatasetSampleAnnotationDAO htpExpressionDatasetSampleAnnotationDAO; - public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { - try { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); HTPExpressionDatasetSampleAnnotationIngestFmsDTO htpExpressionDatasetSampleData = mapper.readValue(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())), HTPExpressionDatasetSampleAnnotationIngestFmsDTO.class); @@ -69,7 +69,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } - } + } private boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List htpDatasetSampleAnnotations, List htpAnnotationsIdsLoaded) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java index 6709ba9ae..953bd0254 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java @@ -6,7 +6,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class BioSampleGenomicInformationDTO extends BaseDTO{ +public class BioSampleGenomicInformationDTO extends BaseDTO { private String biosampleId; private String idType; private String bioSampleText; diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java index 88a8d883f..7a1614b63 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java @@ -9,7 +9,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class HTPExpressionDatasetSampleAnnotationFmsDTO extends BaseDTO{ +public class HTPExpressionDatasetSampleAnnotationFmsDTO extends BaseDTO { private HTPIdFmsDTO sampleId; private String sampleTitle; private String sampleType; diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java index 61a85b7d4..b72b7927c 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationIngestFmsDTO.java @@ -9,7 +9,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class HTPExpressionDatasetSampleAnnotationIngestFmsDTO extends BaseDTO{ - private MetaDataFmsDTO metaData; +public class HTPExpressionDatasetSampleAnnotationIngestFmsDTO extends BaseDTO { + private MetaDataFmsDTO metaData; private List data; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java index d802bde3e..3767c3a8f 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java @@ -7,7 +7,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class MicroarraySampleDetailsDTO extends BaseDTO{ +public class MicroarraySampleDetailsDTO extends BaseDTO { private String channelId; private Integer channelNum; } From 5e57b31fd6971fbcd710012d3618b48a9045afaf Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Tue, 29 Oct 2024 11:02:08 -0500 Subject: [PATCH 006/118] Renaming dto's to FmsDTO --- ...essionDatasetSampleAnnotationExecutor.java | 38 +------------------ ...pleAgeDTO.java => BioSampleAgeFmsDTO.java} | 4 +- ...=> BioSampleGenomicInformationFmsDTO.java} | 2 +- .../ingest/dto/fms/GeneExpressionFmsDTO.java | 4 +- ...pressionDatasetSampleAnnotationFmsDTO.java | 8 ++-- ...ava => MicroarraySampleDetailsFmsDTO.java} | 2 +- ...essedDTO.java => WhenExpressedFmsDTO.java} | 2 +- ...ssedDTO.java => WhereExpressedFmsDTO.java} | 2 +- ...neExpressionAnnotationFmsDTOValidator.java | 8 ++-- ...atasetSampleAnnotationFmsDTOValidator.java | 6 +-- 10 files changed, 20 insertions(+), 56 deletions(-) rename src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/{BioSampleAgeDTO.java => BioSampleAgeFmsDTO.java} (74%) rename src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/{BioSampleGenomicInformationDTO.java => BioSampleGenomicInformationFmsDTO.java} (82%) rename src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/{MicroarraySampleDetailsDTO.java => MicroarraySampleDetailsFmsDTO.java} (82%) rename src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/{WhenExpressedDTO.java => WhenExpressedFmsDTO.java} (86%) rename src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/{WhereExpressedDTO.java => WhereExpressedFmsDTO.java} (93%) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java index 1f4e9c169..026c1dc79 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java @@ -55,7 +55,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount((long) htpExpressionDatasetSampleData.getData().size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(bulkLoadFileHistory, dataProvider, htpExpressionDatasetSampleData.getData(), htpAnnotationsIdsLoaded); + boolean success = runLoad(htpExpressionDatasetSampleAnnotationService, bulkLoadFileHistory, dataProvider, htpExpressionDatasetSampleData.getData(), htpAnnotationsIdsLoaded); if (success) { runCleanup(htpExpressionDatasetSampleAnnotationService, bulkLoadFileHistory, dataProvider.name(), previousIds, htpAnnotationsIdsLoaded, fms.getFmsDataType()); @@ -70,40 +70,4 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { e.printStackTrace(); } } - - private boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List htpDatasetSampleAnnotations, List htpAnnotationsIdsLoaded) { - ProcessDisplayHelper ph = new ProcessDisplayHelper(); - ph.addDisplayHandler(loadProcessDisplayService); - ph.startProcess("HTP Expression Dataset Sample Annotation DTO Update for " + dataProvider.name(), htpDatasetSampleAnnotations.size()); - - updateHistory(history); - for (HTPExpressionDatasetSampleAnnotationFmsDTO dto : htpDatasetSampleAnnotations) { - try { - HTPExpressionDatasetSampleAnnotation dbObject = htpExpressionDatasetSampleAnnotationService.upsert(dto, dataProvider); - history.incrementCompleted(); - if (dbObject != null) { - htpAnnotationsIdsLoaded.add(dbObject.getId()); - } - } catch (ObjectUpdateException e) { - history.incrementFailed(); - addException(history, e.getData()); - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(dto, e.getMessage(), e.getStackTrace())); - } - if (history.getErrorRate() > 0.25) { - Log.error("Failure Rate > 25% aborting load"); - updateHistory(history); - updateExceptions(history); - failLoadAboveErrorRateCutoff(history); - return false; - } - ph.progressProcess(); - } - updateHistory(history); - updateExceptions(history); - ph.finishProcess(); - return true; - } } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeFmsDTO.java similarity index 74% rename from src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java rename to src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeFmsDTO.java index 3b191176d..c2e54bb03 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleAgeFmsDTO.java @@ -7,7 +7,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class BioSampleAgeDTO extends BaseDTO { - private WhenExpressedDTO stage; +public class BioSampleAgeFmsDTO extends BaseDTO { + private WhenExpressedFmsDTO stage; private String age; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationFmsDTO.java similarity index 82% rename from src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java rename to src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationFmsDTO.java index 953bd0254..47e6e4f51 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BioSampleGenomicInformationFmsDTO.java @@ -6,7 +6,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class BioSampleGenomicInformationDTO extends BaseDTO { +public class BioSampleGenomicInformationFmsDTO extends BaseDTO { private String biosampleId; private String idType; private String bioSampleText; diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/GeneExpressionFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/GeneExpressionFmsDTO.java index 73e5da75f..07ed8a065 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/GeneExpressionFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/GeneExpressionFmsDTO.java @@ -13,7 +13,7 @@ public class GeneExpressionFmsDTO extends BaseDTO { private String assay; private String dateAssigned; private PublicationFmsDTO evidence; - private WhenExpressedDTO whenExpressed; - private WhereExpressedDTO whereExpressed; + private WhenExpressedFmsDTO whenExpressed; + private WhereExpressedFmsDTO whereExpressed; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java index 7a1614b63..9fe958615 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTO.java @@ -13,10 +13,10 @@ public class HTPExpressionDatasetSampleAnnotationFmsDTO extends BaseDTO { private HTPIdFmsDTO sampleId; private String sampleTitle; private String sampleType; - private BioSampleAgeDTO sampleAge; - private List sampleLocations; + private BioSampleAgeFmsDTO sampleAge; + private List sampleLocations; private String abundance; - private BioSampleGenomicInformationDTO genomicInformation; + private BioSampleGenomicInformationFmsDTO genomicInformation; private String taxonId; private String sex; private String assayType; @@ -24,6 +24,6 @@ public class HTPExpressionDatasetSampleAnnotationFmsDTO extends BaseDTO { private List assemblyVersions; private String notes; private List datasetIds; - private MicroarraySampleDetailsDTO microarraySampleDetails; + private MicroarraySampleDetailsFmsDTO microarraySampleDetails; private String dateAssigned; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsFmsDTO.java similarity index 82% rename from src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java rename to src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsFmsDTO.java index 3767c3a8f..29219fb0e 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/MicroarraySampleDetailsFmsDTO.java @@ -7,7 +7,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class MicroarraySampleDetailsDTO extends BaseDTO { +public class MicroarraySampleDetailsFmsDTO extends BaseDTO { private String channelId; private Integer channelNum; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhenExpressedDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhenExpressedFmsDTO.java similarity index 86% rename from src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhenExpressedDTO.java rename to src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhenExpressedFmsDTO.java index 341945676..e5e7fc37f 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhenExpressedDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhenExpressedFmsDTO.java @@ -6,7 +6,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class WhenExpressedDTO extends BaseDTO { +public class WhenExpressedFmsDTO extends BaseDTO { private String stageTermId; private String stageName; private UberonSlimTermDTO stageUberonSlimTerm; diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhereExpressedDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhereExpressedFmsDTO.java similarity index 93% rename from src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhereExpressedDTO.java rename to src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhereExpressedFmsDTO.java index 92268423a..90c19fdc3 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhereExpressedDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/WhereExpressedFmsDTO.java @@ -9,7 +9,7 @@ @Data @EqualsAndHashCode(callSuper = true) -public class WhereExpressedDTO extends BaseDTO { +public class WhereExpressedFmsDTO extends BaseDTO { private String whereExpressedStatement; private String cellularComponentTermId; private String cellularComponentQualifierTermId; diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index 0dadf6213..628280680 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -12,8 +12,8 @@ import org.alliancegenome.curation_api.model.entities.ontology.*; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.UberonSlimTermDTO; -import org.alliancegenome.curation_api.model.ingest.dto.fms.WhenExpressedDTO; -import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.WhenExpressedFmsDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedFmsDTO; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.DataProviderService; @@ -133,7 +133,7 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr return geneExpressionAnnotation; } - protected ObjectResponse validateTemporalContext(WhenExpressedDTO whenExpressedDTO) { + protected ObjectResponse validateTemporalContext(WhenExpressedFmsDTO whenExpressedDTO) { ObjectResponse response = new ObjectResponse<>(); TemporalContext temporalContext = new TemporalContext(); if (ObjectUtils.isEmpty(whenExpressedDTO)) { @@ -171,7 +171,7 @@ protected ObjectResponse validateTemporalContext(WhenExpressedD return response; } - protected ObjectResponse validateAnatomicalSite(WhereExpressedDTO whereExpressedDTO) { + protected ObjectResponse validateAnatomicalSite(WhereExpressedFmsDTO whereExpressedDTO) { ObjectResponse response = new ObjectResponse<>(); AnatomicalSite anatomicalSite = new AnatomicalSite(); if (ObjectUtils.isEmpty(whereExpressedDTO)) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 77fb04567..f347ed59e 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -26,7 +26,7 @@ import org.alliancegenome.curation_api.model.entities.ontology.NCBITaxonTerm; import org.alliancegenome.curation_api.model.entities.ontology.OBITerm; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; -import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedDTO; +import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedFmsDTO; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.AffectedGenomicModelService; @@ -66,7 +66,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn ObjectResponse htpSampleAnnotationResponse = new ObjectResponse<>(); HTPExpressionDatasetSampleAnnotation htpSampleAnnotation; - if (dto.getSampleId().getPrimaryId() != null) { + if (StringUtils.isNotBlank(dto.getSampleId().getPrimaryId())) { String curie = dto.getSampleId().getPrimaryId(); ExternalDataBaseEntity externalDbEntity = externalDataBaseEntityFmsDtoValidator.validateExternalDataBaseEntityFmsDTO(dto.getSampleId()); if (externalDbEntity != null) { @@ -133,7 +133,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn if (CollectionUtils.isNotEmpty(dto.getSampleLocations())) { List htpSampleLocations = new ArrayList<>(); - for (WhereExpressedDTO whereExpressedDTO : dto.getSampleLocations()) { + for (WhereExpressedFmsDTO whereExpressedDTO : dto.getSampleLocations()) { ObjectResponse anatomicalSiteObjectResponse = geneExpressionAnnotationFmsDTOValidator.validateAnatomicalSite(whereExpressedDTO); if (anatomicalSiteObjectResponse.hasErrors()) { htpSampleAnnotationResponse.addErrorMessage("SampleLocations", anatomicalSiteObjectResponse.errorMessagesString()); From 09719b59ff79ae772c26e780762fce4a2ec3b720 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Tue, 29 Oct 2024 11:04:31 -0500 Subject: [PATCH 007/118] checkstyle fix --- .../HTPExpressionDatasetSampleAnnotationExecutor.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java index 026c1dc79..f2c12bf3d 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/HTPExpressionDatasetSampleAnnotationExecutor.java @@ -8,20 +8,15 @@ import org.alliancegenome.curation_api.dao.ExternalDataBaseEntityDAO; import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; -import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationIngestFmsDTO; import org.alliancegenome.curation_api.services.ExternalDataBaseEntityService; import org.alliancegenome.curation_api.services.HTPExpressionDatasetSampleAnnotationService; -import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.StringUtils; -import io.quarkus.logging.Log; import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; From 65167925a903b33f31d09624fbcc1931ffaa0cfe Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 30 Oct 2024 10:21:39 +0000 Subject: [PATCH 008/118] VEP loading code --- .../cliapp/src/service/DataLoadService.js | 7 +- .../constants/VocabularyConstants.java | 5 + ...ictedVariantConsequenceCrudController.java | 40 +++ .../dao/PredictedVariantConsequenceDAO.java | 15 + .../enums/BackendBulkLoadType.java | 2 + ...dictedVariantConsequenceCrudInterface.java | 37 ++ .../jobs/executors/BulkLoadJobExecutor.java | 6 + .../executors/ExpressionAtlasExecutor.java | 24 +- .../jobs/executors/LoadFileExecutor.java | 3 +- .../jobs/executors/VepGeneExecutor.java | 169 +++++++++ .../jobs/executors/VepTranscriptExecutor.java | 62 ++++ .../jobs/util/CsvSchemaBuilder.java | 26 ++ .../entities/PredictedVariantConsequence.java | 167 +++++++++ ...atedVariantGenomicLocationAssociation.java | 28 +- .../model/ingest/dto/fms/VepTxtDTO.java | 29 ++ .../PredictedVariantConsequenceService.java | 102 ++++++ .../dto/fms/VariantFmsDTOValidator.java | 2 +- .../dto/fms/VepGeneFmsDTOValidator.java | 79 +++++ .../dto/fms/VepTranscriptFmsDTOValidator.java | 320 ++++++++++++++++++ ...0.37.0.65__predictedvariantconsequence.sql | 178 ++++++++++ .../curation_api/VepFmsITCase.java | 199 +++++++++++ .../bulk/fms/11_vep/AF_01_all_fields.json | 22 ++ .../ER_01_empty_uploaded_variation.json | 22 ++ .../bulk/fms/11_vep/ER_02_empty_feature.json | 22 ++ .../fms/11_vep/ER_03_empty_consequence.json | 22 ++ .../bulk/fms/11_vep/ER_04_empty_impact.json | 22 ++ .../IV_01_invalid_uploaded_variation.json | 22 ++ .../fms/11_vep/IV_02_invalid_feature.json | 22 ++ .../fms/11_vep/IV_03_invalid_consequence.json | 22 ++ .../11_vep/IV_04_invalid_cdna_position.json | 22 ++ .../11_vep/IV_05_invalid_cds_position.json | 22 ++ .../IV_06_invalid_protein_position.json | 22 ++ .../fms/11_vep/IV_07_invalid_amino_acids.json | 22 ++ .../bulk/fms/11_vep/IV_08_invalid_codons.json | 22 ++ .../bulk/fms/11_vep/IV_09_invalid_impact.json | 22 ++ .../fms/11_vep/IV_10_invalid_polyphen.json | 22 ++ .../bulk/fms/11_vep/IV_11_invalid_sift.json | 22 ++ ...IV_12_invalid_variant_transcript_pair.json | 22 ++ .../11_vep/MR_01_no_uploaded_variation.json | 21 ++ .../bulk/fms/11_vep/MR_02_no_feature.json | 21 ++ .../bulk/fms/11_vep/MR_03_no_consequence.json | 21 ++ .../bulk/fms/11_vep/MR_04_no_impact.json | 21 ++ .../bulk/fms/11_vep/UD_01_update.json | 22 ++ 43 files changed, 1960 insertions(+), 20 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java create mode 100644 src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java create mode 100644 src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql create mode 100644 src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java create mode 100644 src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json create mode 100644 src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json create mode 100644 src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json create mode 100644 src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json create mode 100644 src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json create mode 100644 src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json create mode 100644 src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json create mode 100644 src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json create mode 100644 src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json create mode 100644 src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json create mode 100644 src/test/resources/bulk/fms/11_vep/UD_01_update.json diff --git a/src/main/cliapp/src/service/DataLoadService.js b/src/main/cliapp/src/service/DataLoadService.js index dc60d520d..ebd513c14 100644 --- a/src/main/cliapp/src/service/DataLoadService.js +++ b/src/main/cliapp/src/service/DataLoadService.js @@ -81,8 +81,8 @@ export class DataLoadService extends BaseAuthService { getBackendBulkLoadTypes(loadType) { const bulkLoadTypes = { BulkFMSLoad: [ + 'BIOGRID-ORCS', 'GFF', // This needs to be removed at some point - 'GFF_EXON', 'GFF_CDS', 'GFF_TRANSCRIPT', @@ -95,8 +95,9 @@ export class DataLoadService extends BaseAuthService { 'PHENOTYPE', 'PARALOGY', 'SEQUENCE_TARGETING_REAGENT', - // 'VARIATION', - 'BIOGRID-ORCS', + 'VARIATION', + 'VEPGENE', + 'VEPTRANSCRIPT', ], BulkURLLoad: [ 'ONTOLOGY', diff --git a/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java index db0a0d007..2ad5e08dd 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java @@ -87,4 +87,9 @@ private VocabularyConstants() { public static final String HTP_DATASET_CATEGORY_TAGS = "data_set_category_tags"; public static final String HTP_DATASET_NOTE_TYPE_VOCABULARY_TERM_SET = "htp_expression_dataset_note_type"; public static final String HTP_DATASET_SAMPLE_NOTE_TYPE_VOCABULARY_TERM_SET = "htp_expression_dataset_sample_note_type"; + + public static final String VEP_IMPACT_VOCABULARY = "vep_impact"; + public static final String VEP_CONSEQUENCE_VOCABULARY = "vep_consequence"; + public static final String SIFT_PREDICTION_VOCABULARY = "sift_prediction"; + public static final String POLYPHEN_PREDICTION_VOCABULARY = "polyphen_prediction"; } diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java new file mode 100644 index 000000000..1a709cce6 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java @@ -0,0 +1,40 @@ +package org.alliancegenome.curation_api.controllers.crud; + +import java.util.List; + +import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.interfaces.crud.PredictedVariantConsequenceCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.VepGeneExecutor; +import org.alliancegenome.curation_api.jobs.executors.VepTranscriptExecutor; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.services.PredictedVariantConsequenceService; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class PredictedVariantConsequenceCrudController extends BaseEntityCrudController + implements PredictedVariantConsequenceCrudInterface { + + @Inject PredictedVariantConsequenceService predictedVariantConsequenceService; + @Inject VepTranscriptExecutor vepTranscriptExecutor; + @Inject VepGeneExecutor vepGeneExecutor; + + @Override + @PostConstruct + protected void init() { + setService(predictedVariantConsequenceService); + } + + public APIResponse updateTranscriptLevelConsequences(String dataProvider, List consequenceData) { + return vepTranscriptExecutor.runLoadApi(predictedVariantConsequenceService, dataProvider, consequenceData); + } + + public APIResponse updateGeneLevelConsequences(String dataProvider, List consequenceData) { + return vepGeneExecutor.runLoadApi(dataProvider, consequenceData); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java new file mode 100644 index 000000000..e602925cf --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java @@ -0,0 +1,15 @@ +package org.alliancegenome.curation_api.dao; + +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; + +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped +public class PredictedVariantConsequenceDAO extends BaseSQLDAO { + + protected PredictedVariantConsequenceDAO() { + super(PredictedVariantConsequence.class); + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java index 11b17cb8a..c06d1b845 100644 --- a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java +++ b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java @@ -19,6 +19,8 @@ public enum BackendBulkLoadType { CONSTRUCT_ASSOCIATION("json"), VARIANT("json"), VARIATION("json"), // FMS variants as opposed to direct submission for VARIANT + VEPTRANSCRIPT("tsv"), + VEPGENE("tsv"), // GFF all from the same file but split out GFF("gff"), // For Database entries diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java new file mode 100644 index 000000000..25139fde7 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java @@ -0,0 +1,37 @@ +package org.alliancegenome.curation_api.interfaces.crud; + +import java.util.List; + +import org.alliancegenome.curation_api.interfaces.base.BaseIdCrudInterface; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; + +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType; + +@Path("predictedvariantconsequence") +@Tag(name = "CRUD - Predicted Variant Consequence") +@Produces(MediaType.APPLICATION_JSON) +@Consumes(MediaType.APPLICATION_JSON) +public interface PredictedVariantConsequenceCrudInterface extends BaseIdCrudInterface { + + @POST + @Path("/bulk/{dataProvider}/transcriptConsequenceFile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateTranscriptLevelConsequences(@PathParam("dataProvider") String dataProvider, List consequenceData); + + @POST + @Path("/bulk/{dataProvider}/geneConsequenceFile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateGeneLevelConsequences(@PathParam("dataProvider") String dataProvider, List consequenceData); + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index 9fe7ab0b3..15d34c49c 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -58,6 +58,8 @@ public class BulkLoadJobExecutor { @Inject Gff3ExonExecutor gff3ExonExecutor; @Inject Gff3CDSExecutor gff3CDSExecutor; @Inject Gff3TranscriptExecutor gff3TranscriptExecutor; + @Inject VepTranscriptExecutor vepTranscriptExecutor; + @Inject VepGeneExecutor vepGeneExecutor; @Inject HTPExpressionDatasetAnnotationExecutor htpExpressionDatasetAnnotationExecutor; @Inject ExpressionAtlasExecutor expressionAtlasExecutor; @@ -138,6 +140,10 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th expressionAtlasExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.BIOGRID_ORCS) { biogridOrcExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.VEPTRANSCRIPT) { + vepTranscriptExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.VEPGENE) { + vepGeneExecutor.execLoad(bulkLoadFileHistory); } else { log.info("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); throw new Exception("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java index b8bbb4e25..d5e36f8e9 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java @@ -1,10 +1,13 @@ package org.alliancegenome.curation_api.jobs.executors; -import com.fasterxml.jackson.dataformat.xml.XmlMapper; -import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; -import lombok.extern.jbosslog.JBossLog; +import static org.alliancegenome.curation_api.services.DataProviderService.RESOURCE_DESCRIPTOR_PREFIX; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.model.entities.DataProvider; import org.alliancegenome.curation_api.model.entities.Organization; @@ -17,15 +20,12 @@ import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.jetbrains.annotations.NotNull; -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; -import static org.alliancegenome.curation_api.services.DataProviderService.RESOURCE_DESCRIPTOR_PREFIX; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; -@JBossLog @ApplicationScoped public class ExpressionAtlasExecutor extends LoadFileExecutor { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index d7b6b8b52..451177395 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -218,14 +218,13 @@ protected boolean runLoad(BaseUpser idsAdded.add(dbObject.getId()); } } catch (ObjectUpdateException e) { - // e.printStackTrace(); history.incrementFailed(); addException(history, e.getData()); } catch (KnownIssueValidationException e) { Log.debug(e.getMessage()); history.incrementSkipped(); } catch (Exception e) { - // e.printStackTrace(); + e.printStackTrace(); history.incrementFailed(); addException(history, new ObjectUpdateExceptionData(dtoObject, e.getMessage(), e.getStackTrace())); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java new file mode 100644 index 000000000..ac4659bc3 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java @@ -0,0 +1,169 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.PredictedVariantConsequenceService; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections4.ListUtils; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import io.quarkus.logging.Log; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class VepGeneExecutor extends LoadFileExecutor { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject PredictedVariantConsequenceService predictedVariantConsequenceService; + + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema vepTxtSchema = CsvSchemaBuilder.vepTxtSchema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(VepTxtDTO.class).with(vepTxtSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List vepData = it.readAll(); + + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List consequenceIdsLoaded = new ArrayList<>(); + List consequenceIdsBefore = predictedVariantConsequenceService.getGeneLevelIdsByDataProvider(dataProvider); + + bulkLoadFileHistory.setCount(vepData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(bulkLoadFileHistory, dataProvider, vepData, consequenceIdsLoaded); + if (success) { + runCleanup(predictedVariantConsequenceService, bulkLoadFileHistory, dataProvider.name(), consequenceIdsBefore, consequenceIdsLoaded, "gene-level predicted variant consequences"); + } + bulkLoadFileHistory.finishLoad(); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); + + } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); + e.printStackTrace(); + } + } + + protected boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsUpdated) { + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + if (CollectionUtils.isNotEmpty(objectList)) { + String loadMessage = objectList.get(0).getClass().getSimpleName() + " update"; + if (dataProvider != null) { + loadMessage = loadMessage + " for " + dataProvider.name(); + } + ph.startProcess(loadMessage, objectList.size()); + + updateHistory(history); + for (VepTxtDTO dtoObject : objectList) { + try { + Long idUpdated = predictedVariantConsequenceService.updateGeneLevelConsequence(dtoObject); + history.incrementCompleted(); + if (idsUpdated != null) { + idsUpdated.add(idUpdated); + } + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (KnownIssueValidationException e) { + Log.debug(e.getMessage()); + history.incrementSkipped(); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(dtoObject, e.getMessage(), e.getStackTrace())); + } + if (history.getErrorRate() > 0.25) { + Log.error("Failure Rate > 25% aborting load"); + updateHistory(history); + updateExceptions(history); + failLoadAboveErrorRateCutoff(history); + return false; + } + ph.progressProcess(); + } + updateHistory(history); + updateExceptions(history); + ph.finishProcess(); + } + return true; + } + + protected void runCleanup(BulkLoadFileHistory history, String dataProviderName, List annotationIdsBefore, List annotationIdsAfter, String loadTypeString, Boolean deprecate) { + Log.debug("runLoad: After: " + dataProviderName + " " + annotationIdsAfter.size()); + + List distinctAfter = annotationIdsAfter.stream().distinct().collect(Collectors.toList()); + Log.debug("runLoad: Distinct: " + dataProviderName + " " + distinctAfter.size()); + + List idsToReset = ListUtils.subtract(annotationIdsBefore, distinctAfter); + Log.debug("runLoad: Reset: " + dataProviderName + " " + idsToReset.size()); + + String countType = loadTypeString + " reset"; + + long existingResets = history.getCount(countType).getTotal() == null ? 0 : history.getCount(countType).getTotal(); + history.setCount(countType, idsToReset.size() + existingResets); + + String loadDescription = dataProviderName + " " + loadTypeString + " bulk load (" + history.getBulkLoadFile().getMd5Sum() + ")"; + + ProcessDisplayHelper ph = new ProcessDisplayHelper(10000); + ph.startProcess("Deletion/deprecation of: " + dataProviderName + " " + loadTypeString, idsToReset.size()); + + for (Long id : idsToReset) { + try { + predictedVariantConsequenceService.resetGeneLevelConsequence(id, loadDescription); + history.incrementCompleted(countType); + } catch (Exception e) { + history.incrementFailed(countType); + addException(history, new ObjectUpdateExceptionData("{ \"id\": " + id + "}", e.getMessage(), e.getStackTrace())); + } + if (history.getErrorRate(countType) > 0.25) { + Log.error(countType + " failure rate > 25% aborting load"); + failLoadAboveErrorRateCutoff(history); + break; + } + ph.progressProcess(); + } + updateHistory(history); + updateExceptions(history); + ph.finishProcess(); + } + + public APIResponse runLoadApi(String dataProviderName, List consequenceData) { + List idsLoaded = new ArrayList<>(); + BulkLoadFileHistory history = new BulkLoadFileHistory(consequenceData.size()); + history = bulkLoadFileHistoryDAO.persist(history); + BackendBulkDataProvider dataProvider = null; + if (dataProviderName != null) { + dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + } + runLoad(history, dataProvider, consequenceData, idsLoaded); + history.finishLoad(); + return new LoadHistoryResponce(history); + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java new file mode 100644 index 000000000..dfa877ad4 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java @@ -0,0 +1,62 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.services.PredictedVariantConsequenceService; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class VepTranscriptExecutor extends LoadFileExecutor { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject PredictedVariantConsequenceService predictedVariantConsequenceService; + + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema vepTxtSchema = CsvSchemaBuilder.vepTxtSchema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(VepTxtDTO.class).with(vepTxtSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List vepData = it.readAll(); + + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List consequenceIdsLoaded = new ArrayList<>(); + List consequenceIdsBefore = predictedVariantConsequenceService.getIdsByDataProvider(dataProvider); + + bulkLoadFileHistory.setCount(vepData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(predictedVariantConsequenceService, bulkLoadFileHistory, dataProvider, vepData, consequenceIdsLoaded); + if (success) { + runCleanup(predictedVariantConsequenceService, bulkLoadFileHistory, dataProvider.name(), consequenceIdsBefore, consequenceIdsLoaded, "predicted variant consequences"); + } + bulkLoadFileHistory.finishLoad(); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); + + } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); + e.printStackTrace(); + } + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java b/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java index 28c4bfca0..8320586b8 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java @@ -105,4 +105,30 @@ public static CsvSchema gff3Schema() { return schema; } + + public static CsvSchema vepTxtSchema() { + CsvSchema schema = CsvSchema.builder() + .setColumnSeparator('\t') + .setArrayElementSeparator(";") + .setAllowComments(true) + .setNullValue("-") + .disableQuoteChar() + .addColumn("uploadedVariation") + .addColumn("location") + .addColumn("allele") + .addColumn("gene") + .addColumn("feature") + .addColumn("featureType") + .addColumn("consequence") + .addColumn("cdnaPosition") + .addColumn("cdsPosition") + .addColumn("proteinPosition") + .addColumn("aminoAcids") + .addColumn("codons") + .addColumn("existingVariation") + .addColumn("extra") + .build(); + + return schema; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java b/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java new file mode 100644 index 000000000..9e7677194 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java @@ -0,0 +1,167 @@ +package org.alliancegenome.curation_api.model.entities; + +import java.util.List; + +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.bridges.BooleanValueBridge; +import org.alliancegenome.curation_api.model.entities.associations.variantAssociations.CuratedVariantGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.entities.base.AuditedObject; +import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.search.engine.backend.types.Aggregable; +import org.hibernate.search.engine.backend.types.Projectable; +import org.hibernate.search.engine.backend.types.Searchable; +import org.hibernate.search.engine.backend.types.Sortable; +import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; +import org.hibernate.search.mapper.pojo.bridge.mapping.annotation.ValueBridgeRef; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; + +import com.fasterxml.jackson.annotation.JsonBackReference; +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.Index; +import jakarta.persistence.JoinTable; +import jakarta.persistence.ManyToMany; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@Entity +@Data +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) +@ToString(callSuper = true) +@AGRCurationSchemaVersion(min = "2.7.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { AuditedObject.class }) +@Schema(name = "PredictedVariantConsequence", description = "POJO representing VEP predicted variant consequence results") +@Table(indexes = { + @Index(name = "predictedvariantconsequence_varianttranscript_index", columnList = "varianttranscript_id"), + @Index(name = "predictedvariantconsequence_vepimpact_index", columnList = "vepimpact_id"), + @Index(name = "predictedvariantconsequence_polyphenprediction_index", columnList = "polyphenprediction_id"), + @Index(name = "predictedvariantconsequence_siftprediction_index", columnList = "siftprediction_id"), + @Index(name = "predictedvariantconsequence_createdby_index", columnList = "createdby_id"), + @Index(name = "predictedvariantconsequence_updatedby_index", columnList = "updatedby_id") +}) +public class PredictedVariantConsequence extends AuditedObject { + + @ManyToOne + @JsonBackReference + private CuratedVariantGenomicLocationAssociation variantGenomicLocation; + + @IndexedEmbedded(includePaths = {"name", "name_keyword", "curie", "curie_keyword", "modEntityId", "modEntityId_keyword", "modInternalId", "modInternalId_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private Transcript variantTranscript; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private VocabularyTerm vepImpact; + + @IndexedEmbedded(includePaths = {"curie", "name", "secondaryIdentifiers", "synonyms.name", "namespace", + "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "namespace_keyword" }) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToMany + @JoinTable(indexes = { + @Index(name = "predictedvariantconsequence_ontologyterm_pvc_index", columnList = "predictedvariantconsequence_id"), + @Index(name = "predictedvariantconsequence_ontologyterm_vc_index", columnList = "vepconsequences_id") + }) + @JsonView({ View.FieldsAndLists.class, View.VariantView.class }) + private List vepConsequences; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private VocabularyTerm polyphenPrediction; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Float polyphenScore; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private VocabularyTerm siftPrediction; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Float siftScore; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "aminoAcidReference_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String aminoAcidReference; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "aminoAcidVariant_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String aminoAcidVariant; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "codonReference_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String codonReference; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "codonVariant_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String codonVariant; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdnaStart; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdnaEnd; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdsStart; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdsEnd; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedProteinStart; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedProteinEnd; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "hgvsProteinNomenclature_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String hgvsProteinNomenclature; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "hgvsCodingNomenclature_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String hgvsCodingNomenclature; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer", valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) + @KeywordField(name = "geneLevelConsequence_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "boolean default false", nullable = false) + private Boolean geneLevelConsequence = false; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java index 82508a178..23051135b 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java @@ -1,11 +1,22 @@ package org.alliancegenome.curation_api.model.entities.associations.variantAssociations; +import java.util.List; + import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.view.View; +import org.alliancegenome.curation_api.view.View.VariantView; import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; + +import com.fasterxml.jackson.annotation.JsonManagedReference; +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.CascadeType; import jakarta.persistence.Entity; import jakarta.persistence.Index; +import jakarta.persistence.OneToMany; import jakarta.persistence.Table; import lombok.Data; import lombok.EqualsAndHashCode; @@ -14,7 +25,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(callSuper = true) +@ToString(exclude = "predictedVariantConsequences", callSuper = true) @AGRCurationSchemaVersion(min = "2.4.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { VariantGenomicLocationAssociation.class }) @Schema(name = "CuratedVariantGenomicLocationAssociation", description = "POJO representing an association between a variant and a curated genomic location") @@ -35,5 +46,18 @@ ) public class CuratedVariantGenomicLocationAssociation extends VariantGenomicLocationAssociation { - + + @IndexedEmbedded( + includePaths = { + "variantTranscript.name", "variantTranscript.modEntityId", + "variantTranscript.modInternalId", "variantTranscript.curie", + "vepConsequence.name", "variantTranscript.name_keyword", + "variantTranscript.modEntityId_keyword", "variantTranscript.modInternalId_keyword", + "variantTranscript.curie_keyword", "vepConsequence.name_keyword", + } + ) + @OneToMany(mappedBy = "variantGenomicLocation", cascade = CascadeType.ALL, orphanRemoval = true) + @JsonManagedReference + @JsonView({ View.FieldsAndLists.class, VariantView.class }) + private List predictedVariantConsequences; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java new file mode 100644 index 000000000..a25fe8eac --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java @@ -0,0 +1,29 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +import java.util.List; + +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class VepTxtDTO extends BaseDTO { + + private String uploadedVariation; + private String location; + private String allele; + private String gene; + private String feature; + private String featureType; + private String consequence; + private String cdnaPosition; + private String cdsPosition; + private String proteinPosition; + private String aminoAcids; + private String codons; + private String existingVariation; + private List extra; + +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java new file mode 100644 index 000000000..58e461235 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java @@ -0,0 +1,102 @@ +package org.alliancegenome.curation_api.services; + +import java.time.OffsetDateTime; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.alliancegenome.curation_api.constants.EntityFieldConstants; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ApiErrorException; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; +import org.alliancegenome.curation_api.model.entities.Person; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import org.alliancegenome.curation_api.services.validation.dto.fms.VepGeneFmsDTOValidator; +import org.alliancegenome.curation_api.services.validation.dto.fms.VepTranscriptFmsDTOValidator; +import org.apache.commons.lang.StringUtils; + +import io.quarkus.logging.Log; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; + +@RequestScoped +public class PredictedVariantConsequenceService extends BaseEntityCrudService implements BaseUpsertServiceInterface { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject VepTranscriptFmsDTOValidator vepTranscriptFmsDtoValidator; + @Inject VepGeneFmsDTOValidator vepGeneFmsDtoValidator; + @Inject PersonService personService; + + @Override + @PostConstruct + protected void init() { + setSQLDao(predictedVariantConsequenceDAO); + } + + public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { + Map params = new HashMap<>(); + params.put("variantTranscript." + EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + params.put("variantTranscript." + EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); + } + List ids = predictedVariantConsequenceDAO.findIdsByParams(params); + ids.removeIf(Objects::isNull); + return ids; + } + + public List getGeneLevelIdsByDataProvider(BackendBulkDataProvider dataProvider) { + Map params = new HashMap<>(); + params.put("variantTranscript." + EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + params.put("variantTranscript." + EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); + } + params.put("geneLevelConsequence", true); + List ids = predictedVariantConsequenceDAO.findIdsByParams(params); + ids.removeIf(Objects::isNull); + return ids; + } + + @Override + @Transactional + public PredictedVariantConsequence upsert(VepTxtDTO dto, BackendBulkDataProvider dataProvider) + throws ValidationException { + return vepTranscriptFmsDtoValidator.validateTranscriptLevelConsequence(dto, dataProvider); + } + + @Transactional + public Long updateGeneLevelConsequence(VepTxtDTO dto) throws ValidationException { + return vepGeneFmsDtoValidator.validateGeneLevelConsequence(dto); + } + + @Transactional + public PredictedVariantConsequence resetGeneLevelConsequence(Long id, String requestSource) { + PredictedVariantConsequence pvc = predictedVariantConsequenceDAO.find(id); + + if (pvc == null) { + String errorMessage = "Could not find PredictedVariantConsequence with id: " + id; + Log.error(errorMessage); + return null; + } + + if (pvc.getGeneLevelConsequence()) { + pvc.setGeneLevelConsequence(true); + if (authenticatedPerson.getUniqueId() != null) { + requestSource = authenticatedPerson.getUniqueId(); + } + Person updatedBy = personService.fetchByUniqueIdOrCreate(requestSource); + pvc.setUpdatedBy(updatedBy); + pvc.setDateUpdated(OffsetDateTime.now()); + return predictedVariantConsequenceDAO.persist(pvc); + } + + return pvc; + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java index 5cbd0a331..bb5f85282 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java @@ -110,7 +110,7 @@ public Long validateVariant(VariantFmsDTO dto, List idsAdded, BackendBulkD } String hgvs = HgvsIdentifierHelper.getHgvsIdentifier(dto); - String modInternalId = DigestUtils.md5Hex(hgvs); + String modInternalId = hgvs == null ? null : DigestUtils.md5Hex(hgvs); if (StringUtils.isNotBlank(hgvs) && !variantResponse.hasErrors()) { SearchResponse searchResponse = variantDAO.findByField("modInternalId", modInternalId); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java new file mode 100644 index 000000000..aa9e5893b --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java @@ -0,0 +1,79 @@ +package org.alliancegenome.curation_api.services.validation.dto.fms; + +import org.alliancegenome.curation_api.constants.ValidationConstants; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.dao.associations.variantAssociations.CuratedVariantGenomicLocationAssociationDAO; +import org.alliancegenome.curation_api.exceptions.ObjectValidationException; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.entities.Transcript; +import org.alliancegenome.curation_api.model.entities.associations.variantAssociations.CuratedVariantGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.TranscriptService; +import org.alliancegenome.curation_api.services.associations.variantAssociations.CuratedVariantGenomicLocationAssociationService; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class VepGeneFmsDTOValidator { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject CuratedVariantGenomicLocationAssociationService cvglaService; + @Inject TranscriptService transcriptService; + + public Long validateGeneLevelConsequence(VepTxtDTO dto) throws ValidationException { + ObjectResponse response = new ObjectResponse<>(); + PredictedVariantConsequence predictedVariantConsequence = null; + + CuratedVariantGenomicLocationAssociation variantLocation = null; + if (StringUtils.isBlank(dto.getUploadedVariation())) { + response.addErrorMessage("uploadedVariant", ValidationConstants.REQUIRED_MESSAGE); + } else { + SearchResponse cvglaResponse = cvglaService.findByField("hgvs", dto.getUploadedVariation()); + if (cvglaResponse != null && cvglaResponse.getSingleResult() != null) { + variantLocation = cvglaResponse.getSingleResult(); + } else { + response.addErrorMessage("uploadedVariant", ValidationConstants.INVALID_MESSAGE + " (" + dto.getUploadedVariation() + ")"); + } + } + + Transcript transcript = null; + if (StringUtils.isBlank(dto.getFeature())) { + response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); + } else { + transcript = transcriptService.getByIdentifier(dto.getFeature()).getEntity(); + if (transcript == null) { + response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); + } + } + + if (variantLocation != null && CollectionUtils.isNotEmpty(variantLocation.getPredictedVariantConsequences()) && transcript != null) { + for (PredictedVariantConsequence existingPvc : variantLocation.getPredictedVariantConsequences()) { + if (transcript.getId() == existingPvc.getVariantTranscript().getId()) { + predictedVariantConsequence = existingPvc; + break; + } + } + } + + if (predictedVariantConsequence == null) { + response.addErrorMessage("uploadedVariant / feature", ValidationConstants.INVALID_MESSAGE + " (" + + dto.getUploadedVariation() + " / " + dto.getFeature() + ")"); + } else { + predictedVariantConsequence.setGeneLevelConsequence(true); + } + + if (response.hasErrors()) { + throw new ObjectValidationException(dto, response.errorMessagesString()); + } + + predictedVariantConsequence = predictedVariantConsequenceDAO.persist(predictedVariantConsequence); + + return predictedVariantConsequence.getId(); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java new file mode 100644 index 000000000..d2cfa4a6a --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -0,0 +1,320 @@ +package org.alliancegenome.curation_api.services.validation.dto.fms; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.alliancegenome.curation_api.constants.ValidationConstants; +import org.alliancegenome.curation_api.constants.VocabularyConstants; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.dao.associations.variantAssociations.CuratedVariantGenomicLocationAssociationDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectValidationException; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.entities.Transcript; +import org.alliancegenome.curation_api.model.entities.VocabularyTerm; +import org.alliancegenome.curation_api.model.entities.associations.variantAssociations.CuratedVariantGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.TranscriptService; +import org.alliancegenome.curation_api.services.VocabularyTermService; +import org.alliancegenome.curation_api.services.associations.variantAssociations.CuratedVariantGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.ontology.SoTermService; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; + +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class VepTranscriptFmsDTOValidator { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject CuratedVariantGenomicLocationAssociationDAO cvglaDAO; + @Inject CuratedVariantGenomicLocationAssociationService cvglaService; + @Inject TranscriptService transcriptService; + @Inject VocabularyTermService vocabularyTermService; + @Inject SoTermService soTermService; + + private static final Pattern PATHOGENICITY_PREDICTION_RESULT = Pattern.compile("^([\\w]+)\\(([\\d\\.]+)\\)$"); + private static final Pattern POSITION_STRING = Pattern.compile("^[\\d\\?\\-]+$"); + + public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO dto, BackendBulkDataProvider dataProvider) throws ValidationException { + ObjectResponse response = new ObjectResponse<>(); + PredictedVariantConsequence predictedVariantConsequence = new PredictedVariantConsequence(); + + CuratedVariantGenomicLocationAssociation variantLocation = null; + if (StringUtils.isBlank(dto.getUploadedVariation())) { + response.addErrorMessage("uploadedVariant", ValidationConstants.REQUIRED_MESSAGE); + } else { + SearchResponse cvglaResponse = cvglaService.findByField("hgvs", dto.getUploadedVariation()); + if (cvglaResponse != null && cvglaResponse.getSingleResult() != null) { + variantLocation = cvglaResponse.getSingleResult(); + } else { + response.addErrorMessage("uploadedVariant", ValidationConstants.INVALID_MESSAGE + " (" + dto.getUploadedVariation() + ")"); + } + } + + Transcript transcript = null; + if (StringUtils.isBlank(dto.getFeature())) { + response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); + } else { + transcript = transcriptService.getByIdentifier(dto.getFeature()).getEntity(); + if (transcript == null) { + response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); + } + } + + Boolean isUpdate = false; + if (variantLocation != null && CollectionUtils.isNotEmpty(variantLocation.getPredictedVariantConsequences()) && transcript != null) { + for (PredictedVariantConsequence existingPvc : variantLocation.getPredictedVariantConsequences()) { + if (transcript.getId() == existingPvc.getVariantTranscript().getId()) { + predictedVariantConsequence = existingPvc; + isUpdate = true; + break; + } + } + } + + predictedVariantConsequence.setVariantGenomicLocation(variantLocation); + predictedVariantConsequence.setVariantTranscript(transcript); + + Map attributes = getExtraAttributes(dto); + + VocabularyTerm vepImpact = null; + if (!attributes.containsKey("IMPACT")) { + response.addErrorMessage("extra - IMPACT", ValidationConstants.REQUIRED_MESSAGE); + } else { + vepImpact = vocabularyTermService.getTermInVocabulary(VocabularyConstants.VEP_IMPACT_VOCABULARY, attributes.get("IMPACT")).getEntity(); + if (vepImpact == null) { + response.addErrorMessage("extra - IMPACT", ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("IMPACT") + ")"); + } + } + predictedVariantConsequence.setVepImpact(vepImpact); + + List vepConsequences = null; + if (StringUtils.isBlank(dto.getConsequence())) { + response.addErrorMessage("consequence", ValidationConstants.REQUIRED_MESSAGE); + } else { + vepConsequences = new ArrayList<>(); + for (String consequence : dto.getConsequence().split(",")) { + SearchResponse soTermResponse = soTermService.findByField("name", consequence); + SOTerm vepConsequence = null; + if (soTermResponse != null && soTermResponse.getSingleResult() != null + && vocabularyTermService.getTermInVocabulary(VocabularyConstants.VEP_CONSEQUENCE_VOCABULARY, consequence).getEntity() != null) { + vepConsequence = soTermResponse.getSingleResult(); + } + if (vepConsequence == null) { + response.addErrorMessage("consequence", ValidationConstants.INVALID_MESSAGE + " (" + consequence + ")"); + break; + } else { + vepConsequences.add(vepConsequence); + } + } + } + predictedVariantConsequence.setVepConsequences(vepConsequences); + + String hgvsCodingNomenclature = null; + if (attributes.containsKey("HGVSc")) { + hgvsCodingNomenclature = attributes.get("HGVSc"); + } + predictedVariantConsequence.setHgvsCodingNomenclature(hgvsCodingNomenclature); + + String hgvsProteinNomenclature = null; + if (attributes.containsKey("HGVSp")) { + hgvsProteinNomenclature = attributes.get("HGVSp"); + } + predictedVariantConsequence.setHgvsProteinNomenclature(hgvsProteinNomenclature); + + String referenceCodon = null; + String variantCodon = null; + if (StringUtils.isNotBlank(dto.getCodons())) { + String[] refVarCodons = dto.getCodons().split("/"); + if (refVarCodons.length != 2) { + response.addErrorMessage("codons", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCodons() + ")"); + } else { + referenceCodon = refVarCodons[0]; + variantCodon = refVarCodons[1]; + } + } + predictedVariantConsequence.setCodonReference(referenceCodon); + predictedVariantConsequence.setCodonVariant(variantCodon); + + String referenceAminoAcids = null; + String variantAminoAcids = null; + if (StringUtils.isNotBlank(dto.getAminoAcids())) { + String[] refVarAminoAcids = dto.getAminoAcids().split("/"); + if (refVarAminoAcids.length != 2) { + response.addErrorMessage("aminoAcids", ValidationConstants.INVALID_MESSAGE + " (" + dto.getAminoAcids() + ")"); + } else { + referenceAminoAcids = refVarAminoAcids[0]; + variantAminoAcids = refVarAminoAcids[1]; + } + } + predictedVariantConsequence.setAminoAcidReference(referenceAminoAcids); + predictedVariantConsequence.setAminoAcidVariant(variantAminoAcids); + + VocabularyTerm polyphenPrediction = null; + Float polyphenScore = null; + if (attributes.containsKey("PolyPhen")) { + Pair polyphenResult = parsePathogenicityPredictionScore(attributes.get("PolyPhen"), VocabularyConstants.POLYPHEN_PREDICTION_VOCABULARY); + if (polyphenResult == null) { + response.addErrorMessage("extra - PolyPhen", ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("PolyPhen") + ")"); + + } else { + polyphenPrediction = polyphenResult.getLeft(); + polyphenScore = polyphenResult.getRight(); + } + } + predictedVariantConsequence.setPolyphenPrediction(polyphenPrediction); + predictedVariantConsequence.setPolyphenScore(polyphenScore); + + VocabularyTerm siftPrediction = null; + Float siftScore = null; + if (attributes.containsKey("SIFT")) { + Pair siftResult = parsePathogenicityPredictionScore(attributes.get("SIFT"), VocabularyConstants.SIFT_PREDICTION_VOCABULARY); + if (siftResult == null) { + response.addErrorMessage("extra - SIFT", ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("SIFT") + ")"); + + } else { + siftPrediction = siftResult.getLeft(); + siftScore = siftResult.getRight(); + } + } + predictedVariantConsequence.setSiftPrediction(siftPrediction); + predictedVariantConsequence.setSiftScore(siftScore); + + Integer cdnaStart = null; + Integer cdnaEnd = null; + if (StringUtils.isNotBlank(dto.getCdnaPosition())) { + Pair cdnaStartEnd = parseStartEnd(dto.getCdnaPosition()); + if (cdnaStartEnd == null) { + response.addErrorMessage("cdnaPosition", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCdnaPosition() + ")"); + } else { + cdnaStart = cdnaStartEnd.getLeft(); + cdnaEnd = cdnaStartEnd.getRight(); + } + } + predictedVariantConsequence.setCalculatedCdnaStart(cdnaStart); + predictedVariantConsequence.setCalculatedCdnaEnd(cdnaEnd); + + Integer cdsStart = null; + Integer cdsEnd = null; + if (StringUtils.isNotBlank(dto.getCdsPosition())) { + Pair cdsStartEnd = parseStartEnd(dto.getCdsPosition()); + if (cdsStartEnd == null) { + response.addErrorMessage("cdsPosition", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCdsPosition() + ")"); + } else { + cdsStart = cdsStartEnd.getLeft(); + cdsEnd = cdsStartEnd.getRight(); + } + } + predictedVariantConsequence.setCalculatedCdsStart(cdsStart); + predictedVariantConsequence.setCalculatedCdsEnd(cdsEnd); + + Integer proteinStart = null; + Integer proteinEnd = null; + if (StringUtils.isNotBlank(dto.getProteinPosition())) { + Pair proteinStartEnd = parseStartEnd(dto.getProteinPosition()); + if (proteinStartEnd == null) { + response.addErrorMessage("proteinPosition", ValidationConstants.INVALID_MESSAGE + " (" + dto.getProteinPosition() + ")"); + } else { + proteinStart = proteinStartEnd.getLeft(); + proteinEnd = proteinStartEnd.getRight(); + } + } + predictedVariantConsequence.setCalculatedProteinStart(proteinStart); + predictedVariantConsequence.setCalculatedProteinEnd(proteinEnd); + + if (response.hasErrors()) { + throw new ObjectValidationException(dto, response.errorMessagesString()); + } + + predictedVariantConsequence = predictedVariantConsequenceDAO.persist(predictedVariantConsequence); + + if (!isUpdate) { + if (variantLocation.getPredictedVariantConsequences() == null) { + variantLocation.setPredictedVariantConsequences(new ArrayList<>()); + } + variantLocation.getPredictedVariantConsequences().add(predictedVariantConsequence); + } + cvglaDAO.persist(variantLocation); + + return predictedVariantConsequence; + } + + private Map getExtraAttributes(VepTxtDTO dto) { + Map attributes = new HashMap(); + if (CollectionUtils.isNotEmpty(dto.getExtra())) { + for (String keyValue : dto.getExtra()) { + String[] parts = keyValue.split("="); + if (parts.length == 2) { + attributes.put(parts[0], parts[1]); + } + } + } + + return attributes; + } + + private Pair parsePathogenicityPredictionScore(String result, String vocabularyName) { + + if (StringUtils.isBlank(result)) { + return null; + } + + Matcher matcher = PATHOGENICITY_PREDICTION_RESULT.matcher(result); + if (!matcher.find()) { + return null; + } + + VocabularyTerm consequence = vocabularyTermService.getTermInVocabulary(vocabularyName, matcher.group(1)).getEntity(); + if (consequence == null) { + return null; + } + + ImmutablePair parsedResult = new ImmutablePair<>(consequence, Float.parseFloat(matcher.group(2))); + + return parsedResult; + } + + private Pair parseStartEnd(String position) { + Matcher matcher = POSITION_STRING.matcher(position); + if(!matcher.find()) { + return null; + } + + Integer start = null; + Integer end = null; + String[] positions = position.split("-"); + + if (positions.length > 2) { + return null; + } + + if(positions.length == 1) { + start = Integer.parseInt(position); + end = start; + } else { + if (!Objects.equals("?", positions[0])) { + start = Integer.parseInt(positions[0]); + } + if (!Objects.equals("?", positions[1])) { + end = Integer.parseInt(positions[1]); + } + } + + ImmutablePair startEnd = new ImmutablePair<>(start, end); + + return startEnd; + } +} diff --git a/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql b/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql new file mode 100644 index 000000000..42eadf3ef --- /dev/null +++ b/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql @@ -0,0 +1,178 @@ +CREATE TABLE predictedvariantconsequence ( + id bigint PRIMARY KEY, + datecreated timestamp(6) with time zone, + dateupdated timestamp(6) with time zone, + dbdatecreated timestamp(6) with time zone, + dbdateupdated timestamp(6) with time zone, + internal boolean DEFAULT false NOT NULL, + obsolete boolean DEFAULT false NOT NULL, + createdby_id bigint, + updatedby_id bigint, + variantgenomiclocation_id bigint, + varianttranscript_id bigint, + vepimpact_id bigint, + polyphenprediction_id bigint, + polyphenscore real, + siftprediction_id bigint, + siftscore real, + aminoacidreference text, + aminoacidvariant text, + codonreference text, + codonvariant text, + calculatedcdnastart integer, + calculatedcdnaend integer, + calculatedcdsstart integer, + calculatedcdsend integer, + calculatedproteinstart integer, + calculatedproteinend integer, + hgvsproteinnomenclature text, + hgvscodingnomenclature text, + genelevelconsequence boolean DEFAULT false NOT NULL +); + +CREATE SEQUENCE predictedvariantconsequence_seq + START WITH 1 + INCREMENT BY 50 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +CREATE INDEX predictedvariantconsequence_varianttranscript_index ON predictedvariantconsequence USING btree (varianttranscript_id); +CREATE INDEX predictedvariantconsequence_vepimpact_index ON predictedvariantconsequence USING btree (vepimpact_id); +CREATE INDEX predictedvariantconsequence_polyphenprediction_index ON predictedvariantconsequence USING btree (polyphenprediction_id); +CREATE INDEX predictedvariantconsequence_siftprediction_index ON predictedvariantconsequence USING btree (siftprediction_id); +CREATE INDEX predictedvariantconsequence_createdby_index ON predictedvariantconsequence USING btree (createdby_id); +CREATE INDEX predictedvariantconsequence_updatedby_index ON predictedvariantconsequence USING btree (updatedby_id); + +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_createdby_id_fk FOREIGN KEY (createdby_id) REFERENCES person(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_updatedby_id_fk FOREIGN KEY (updatedby_id) REFERENCES person(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_variantgenomiclocation_id_fk FOREIGN KEY (variantgenomiclocation_id) REFERENCES curatedvariantgenomiclocation(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_varianttranscript_id_fk FOREIGN KEY (varianttranscript_id) REFERENCES transcript(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_vepimpact_id_fk FOREIGN KEY (vepimpact_id) REFERENCES vocabularyterm(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_polyphenprediction_id_fk FOREIGN KEY (polyphenprediction_id) REFERENCES vocabularyterm(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_polyphenscore_id_fk FOREIGN KEY (siftprediction_id) REFERENCES vocabularyterm(id); + +CREATE TABLE predictedvariantconsequence_ontologyterm ( + predictedvariantconsequence_id bigint, + vepconsequences_id bigint +); + +CREATE INDEX predictedvariantconsequence_ontologyterm_pvc_index ON predictedvariantconsequence_ontologyterm USING btree (predictedvariantconsequence_id); +CREATE INDEX predictedvariantconsequence_ontologyterm_vc_index ON predictedvariantconsequence_ontologyterm USING btree (vepconsequences_id); + +ALTER TABLE ONLY predictedvariantconsequence_ontologyterm ADD CONSTRAINT predictedvariantconsequence_ontologyterm_pvc_id_fk FOREIGN KEY (predictedvariantconsequence_id) REFERENCES predictedvariantconsequence (id); +ALTER TABLE ONLY predictedvariantconsequence_ontologyterm ADD CONSTRAINT predictedvariantconsequence_ontologyterm_vc_id_fk FOREIGN KEY (vepconsequences_id) REFERENCES ontologyterm (id); + +INSERT INTO bulkloadgroup (id, name) VALUES (nextval('bulkloadgroup_seq'), 'File Management System (FMS) VEP Transcript Loads'); +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'FB VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'MGI VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'RGD VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'WB VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'ZFIN VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) + SELECT id, '0 0 20 ? * SUN-THU', false FROM bulkload WHERE backendbulkloadtype = 'VEPTRANSCRIPT'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'FB' FROM bulkload WHERE name = 'FB VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'MGI' FROM bulkload WHERE name = 'MGI VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'RGD' FROM bulkload WHERE name = 'RGD VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'WB' FROM bulkload WHERE name = 'WB VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'ZFIN' FROM bulkload WHERE name = 'ZFIN VEP Transcript Load'; + +INSERT INTO bulkloadgroup (id, name) VALUES (nextval('bulkloadgroup_seq'), 'File Management System (FMS) VEP Gene Loads'); +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'FB VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'MGI VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'RGD VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'WB VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'ZFIN VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) + SELECT id, '0 0 23 ? * SUN-THU', false FROM bulkload WHERE backendbulkloadtype = 'VEPGENE'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'FB' FROM bulkload WHERE name = 'FB VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'MGI' FROM bulkload WHERE name = 'MGI VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'RGD' FROM bulkload WHERE name = 'RGD VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'WB' FROM bulkload WHERE name = 'WB VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'ZFIN' FROM bulkload WHERE name = 'ZFIN VEP Gene Load'; + +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'SIFT Prediction', 'SIFT prediction of impact of missense variation', 'sift_prediction'); +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'PolyPhen-2 Prediction', 'Polyphen-2 prediction of impact of missense variation', 'polyphen_prediction'); +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'VEP Impact', 'Ensembl VEP predicted impact rating of variant', 'vep_impact'); +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'VEP Consequence', 'Names of SOTerms used to report predicted consequence of variant by Ensembl VEP', 'vep_consequence'); + +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'deleterious', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'deleterious_low_confidence', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'tolerated', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'tolerated_low_confidence', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'no_prediction', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'benign', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'possibly_damaging', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'probably_damaging', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'unknown', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'no_prediction', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'HIGH', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'MODERATE', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'LOW', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'MODIFIER', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'transcript_ablation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_acceptor_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_donor_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'stop_gained', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'frameshift_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'stop_lost', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'start_lost', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'transcript_amplification', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'feature_elongation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'feature_truncation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'inframe_insertion', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'inframe_deletion', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'missense_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'protein_altering_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_donor_5th_base_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_region_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_donor_region_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_polypyrimidine_tract_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'incomplete_terminal_codon_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'start_retained_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'stop_retained_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'synonymous_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'coding_sequence_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'mature_miRNA_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), '5_prime_UTR_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), '3_prime_UTR_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'non_coding_transcript_exon_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'intron_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'NMD_transcript_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'non_coding_transcript_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'coding_transcript_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'upstream_gene_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'downstream_gene_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'TFBS_ablation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'TFBS_amplification', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'TF_binding_site_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'regulatory_region_ablation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'regulatory_region_amplification', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'regulatory_region_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'intergenic_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'sequence_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; + diff --git a/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java new file mode 100644 index 000000000..f971cfebf --- /dev/null +++ b/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java @@ -0,0 +1,199 @@ +package org.alliancegenome.curation_api; + +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.hasKey; + +import org.alliancegenome.curation_api.base.BaseITCase; +import org.alliancegenome.curation_api.resources.TestContainerResource; +import org.apache.commons.codec.digest.DigestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +import io.quarkus.test.common.QuarkusTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.RestAssured; +import io.restassured.config.HttpClientConfig; +import io.restassured.config.RestAssuredConfig; + +@QuarkusIntegrationTest +@QuarkusTestResource(TestContainerResource.Initializer.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@DisplayName("611 - VEP data bulk upload - FMS") +@Order(611) +public class VepFmsITCase extends BaseITCase { + + // These tests require: GeneBulkUploadITCase and VocabularyTermITCase + + @BeforeEach + public void init() { + RestAssured.config = RestAssuredConfig.config() + .httpClient(HttpClientConfig.httpClientConfig() + .setParam("http.socket.timeout", 100000) + .setParam("http.connection.timeout", 100000)); + } + + private final String vepFmsTestFilePath = "src/test/resources/bulk/fms/11_vep/"; + private final String vepTranscriptFmsBulkPostEndpoint = "/api/predictedvariantconsequence/bulk/WB/transcriptConsequenceFile"; + private final String vepGeneFmsBulkPostEndpoint = "/api/predictedvariantconsequence/bulk/WB/geneConsequenceFile"; + private final String variantHgvs = "NC_003279.8:g.1A>T"; + private final String variantId = DigestUtils.md5Hex(variantHgvs); + private final String variantGetEndpoint = "/api/variant/"; + + private void loadRequiredEntities() throws Exception { + createSoTerm("SO:0001574", "splice_acceptor_variant", false); + createSoTerm("SO:0001630", "splice_donor_5th_base_variant", false); + } + + @Test + @Order(1) + public void vepTranscriptFmsBulkUpload() throws Exception { + loadRequiredEntities(); + + checkSuccessfulBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "AF_01_all_fields.json"); + + RestAssured.given(). + when(). + get(variantGetEndpoint + variantId). + then(). + statusCode(200). + body("entity.modInternalId", is(variantId)). + body("entity.curatedVariantGenomicLocations[0].hgvs", is("NC_003279.8:g.1A>T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].variantTranscript.modInternalId", is("WB:Y74C9A.2a.1")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepImpact.name", is("MODERATE")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences[0].name", is("splice_acceptor_variant")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenPrediction.name", is("probably_damaging")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenScore", is(0.993F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftPrediction.name", is("tolerated")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftScore", is(0F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidReference", is("T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidVariant", is("I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonReference", is("aCc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonVariant", is("aTc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaStart", is(3)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaEnd", is(800)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsStart", is(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsEnd", is(600)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinStart", is(246)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0]", not(hasKey("calculatedProteinEnd"))). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsProteinNomenclature", is("WB:CE49439:p.Thr10Ile")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsCodingNomenclature", is("WB:Y74C9A.2a.1:c.29T>I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(false)); + } + + @Test + @Order(2) + public void vepGeneBulkUpload() throws Exception { + checkSuccessfulBulkLoad(vepGeneFmsBulkPostEndpoint, vepFmsTestFilePath + "AF_01_all_fields.json"); + + RestAssured.given(). + when(). + get(variantGetEndpoint + variantId). + then(). + statusCode(200). + body("entity.modInternalId", is(variantId)). + body("entity.curatedVariantGenomicLocations[0].hgvs", is("NC_003279.8:g.1A>T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].variantTranscript.modInternalId", is("WB:Y74C9A.2a.1")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepImpact.name", is("MODERATE")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences[0].name", is("splice_acceptor_variant")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenPrediction.name", is("probably_damaging")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenScore", is(0.993F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftPrediction.name", is("tolerated")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftScore", is(0F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidReference", is("T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidVariant", is("I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonReference", is("aCc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonVariant", is("aTc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaStart", is(3)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaEnd", is(800)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsStart", is(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsEnd", is(600)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinStart", is(246)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0]", not(hasKey("calculatedProteinEnd"))). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsProteinNomenclature", is("WB:CE49439:p.Thr10Ile")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsCodingNomenclature", is("WB:Y74C9A.2a.1:c.29T>I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(true)); + } + + @Test + @Order(3) + public void vepTranscriptMissingRequiredFields() throws Exception { + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_01_no_uploaded_variation.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_02_no_feature.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_03_no_consequence.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_04_no_impact.json"); + } + + @Test + @Order(4) + public void vepTranscriptEmptyRequiredFields() throws Exception { + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_01_empty_uploaded_variation.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_02_empty_feature.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_03_empty_consequence.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_04_empty_impact.json"); + } + + @Test + @Order(5) + public void vepTranscriptInvalidFields() throws Exception { + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_01_invalid_uploaded_variation.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_02_invalid_feature.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_03_invalid_consequence.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_04_invalid_cdna_position.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_05_invalid_cds_position.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_06_invalid_protein_position.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_07_invalid_amino_acids.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_08_invalid_codons.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_09_invalid_impact.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_10_invalid_polyphen.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_11_invalid_sift.json"); + checkFailedBulkLoad(vepGeneFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_12_invalid_variant_transcript_pair.json"); + } + + @Test + @Order(6) + public void vepTranscriptUpdate() throws Exception { + checkSuccessfulBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "UD_01_update.json"); + + RestAssured.given(). + when(). + get(variantGetEndpoint + variantId). + then(). + statusCode(200). + body("entity.modInternalId", is(variantId)). + body("entity.curatedVariantGenomicLocations[0].hgvs", is("NC_003279.8:g.1A>T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].variantTranscript.modInternalId", is("WB:Y74C9A.2a.1")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepImpact.name", is("MODIFIER")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences[0].name", is("splice_donor_5th_base_variant")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenPrediction.name", is("possibly_damaging")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenScore", is(0.8F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftPrediction.name", is("deleterious_low_confidence")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftScore", is(0.767F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidReference", is("M")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidVariant", is("N")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonReference", is("aCt")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonVariant", is("aTt")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaStart", is(2)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaEnd", is(900)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsStart", is(3)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsEnd", is(500)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinStart", is(247)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinEnd", is(250)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsProteinNomenclature", is("WB:CE49439:p.Met10Neo")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsCodingNomenclature", is("WB:Y74C9A.2a.1:c.29M>N")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(true)); + } + +} diff --git a/src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json b/src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json new file mode 100644 index 000000000..f68f032b4 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json b/src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json new file mode 100644 index 000000000..a50d2f5bf --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json b/src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json new file mode 100644 index 000000000..623ee5311 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json b/src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json new file mode 100644 index 000000000..56ff42450 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json b/src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json new file mode 100644 index 000000000..5a4ff658b --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json b/src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json new file mode 100644 index 000000000..7bb5766d4 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "Invalid", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json b/src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json new file mode 100644 index 000000000..010eb35b1 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Invalid", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json b/src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json new file mode 100644 index 000000000..1984520a5 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "invalid", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json b/src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json new file mode 100644 index 000000000..e8b70e196 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3 - 800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json b/src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json new file mode 100644 index 000000000..697fdc926 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1 to 600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json b/src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json new file mode 100644 index 000000000..fbd4db078 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "unknown", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json b/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json new file mode 100644 index 000000000..bfa4a2564 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "TI", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json b/src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json new file mode 100644 index 000000000..c80c0bdd4 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCcaTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json b/src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json new file mode 100644 index 000000000..854d16908 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=INVALID", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json b/src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json new file mode 100644 index 000000000..76638b5ed --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=invalid(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json b/src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json new file mode 100644 index 000000000..d61d901d9 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=invalid(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json b/src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json new file mode 100644 index 000000000..6728068eb --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "VARIANTTEST:Variant0001", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json b/src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json new file mode 100644 index 000000000..20b35b4f2 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json @@ -0,0 +1,21 @@ +[ + { + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json b/src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json new file mode 100644 index 000000000..6bc3d351f --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json @@ -0,0 +1,21 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json b/src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json new file mode 100644 index 000000000..512fd10b1 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json @@ -0,0 +1,21 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json b/src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json new file mode 100644 index 000000000..89430906d --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json @@ -0,0 +1,21 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/UD_01_update.json b/src/test/resources/bulk/fms/11_vep/UD_01_update.json new file mode 100644 index 000000000..1fb48758c --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/UD_01_update.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:2-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_donor_5th_base_variant", + "cdnaPosition": "2-900", + "cdsPosition": "3-500", + "proteinPosition": "247-250", + "aminoAcids": "M/N", + "codons": "aCt/aTt", + "extra": [ + "IMPACT=MODIFIER", + "HGVSc=WB:Y74C9A.2a.1:c.29M>N", + "HGVSp=WB:CE49439:p.Met10Neo", + "PolyPhen=possibly_damaging(0.8)", + "SIFT=deleterious_low_confidence(0.767)" + ] + } +] + From 5f21273401078b5cafa768c2eba22988c0d88627 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 30 Oct 2024 10:23:25 +0000 Subject: [PATCH 009/118] Remove unused import --- .../services/validation/dto/fms/VepGeneFmsDTOValidator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java index aa9e5893b..fa97aa8bc 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java @@ -2,7 +2,6 @@ import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; -import org.alliancegenome.curation_api.dao.associations.variantAssociations.CuratedVariantGenomicLocationAssociationDAO; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; From 4280be286807a358f97726c4a9c36028f8671f0e Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 30 Oct 2024 10:26:21 +0000 Subject: [PATCH 010/118] Remove more unused imports --- .../services/PredictedVariantConsequenceService.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java index 58e461235..684a29752 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java @@ -9,13 +9,11 @@ import org.alliancegenome.curation_api.constants.EntityFieldConstants; import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ApiErrorException; import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; import org.alliancegenome.curation_api.model.entities.Person; import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; -import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.validation.dto.fms.VepGeneFmsDTOValidator; import org.alliancegenome.curation_api.services.validation.dto.fms.VepTranscriptFmsDTOValidator; From 420ae0a14bb6b6eae5bf4b7a27c898603e65cb47 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 31 Oct 2024 11:48:29 -0500 Subject: [PATCH 011/118] Changes in validation of HTPDataset Sample --- ...DatasetSampleAnnotationCrudController.java | 35 +++++++ .../curation_api/dao/AnatomicalSiteDAO.java | 13 +++ ...nDatasetSampleAnnotationCrudInterface.java | 29 ++++++ .../model/entities/BioSampleAge.java | 2 +- ...ressionDatasetSampleAnnotationService.java | 3 +- ...neExpressionAnnotationFmsDTOValidator.java | 2 +- ...atasetSampleAnnotationFmsDTOValidator.java | 91 ++++++++++++------- 7 files changed, 139 insertions(+), 36 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/controllers/crud/HTPExpressionDatasetSampleAnnotationCrudController.java create mode 100644 src/main/java/org/alliancegenome/curation_api/dao/AnatomicalSiteDAO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/interfaces/crud/HTPExpressionDatasetSampleAnnotationCrudInterface.java diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/HTPExpressionDatasetSampleAnnotationCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/HTPExpressionDatasetSampleAnnotationCrudController.java new file mode 100644 index 000000000..a75bdefba --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/HTPExpressionDatasetSampleAnnotationCrudController.java @@ -0,0 +1,35 @@ +package org.alliancegenome.curation_api.controllers.crud; + +import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; +import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; +import org.alliancegenome.curation_api.interfaces.crud.HTPExpressionDatasetSampleAnnotationCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.HTPExpressionDatasetSampleAnnotationExecutor; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationIngestFmsDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.services.HTPExpressionDatasetSampleAnnotationService; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class HTPExpressionDatasetSampleAnnotationCrudController extends BaseEntityCrudController implements HTPExpressionDatasetSampleAnnotationCrudInterface { + + @Inject + HTPExpressionDatasetSampleAnnotationService htpExpressionDatasetSampleAnnotationService; + @Inject + HTPExpressionDatasetSampleAnnotationExecutor htpExpressionDatasetSampleAnnotationExecutor; + + @Override + @PostConstruct + public void init() { + setService(htpExpressionDatasetSampleAnnotationService); + } + + @Override + public APIResponse updateHTPExpressionDatasetSampleAnnotation(String dataProvider, HTPExpressionDatasetSampleAnnotationIngestFmsDTO htpDatasetSampleData) { + return htpExpressionDatasetSampleAnnotationExecutor.runLoadApi(htpExpressionDatasetSampleAnnotationService, dataProvider, htpDatasetSampleData.getData()); + } + +} \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/dao/AnatomicalSiteDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/AnatomicalSiteDAO.java new file mode 100644 index 000000000..1147a7532 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/AnatomicalSiteDAO.java @@ -0,0 +1,13 @@ +package org.alliancegenome.curation_api.dao; + +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.AnatomicalSite; + +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped +public class AnatomicalSiteDAO extends BaseSQLDAO { + protected AnatomicalSiteDAO() { + super(AnatomicalSite.class); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/HTPExpressionDatasetSampleAnnotationCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/HTPExpressionDatasetSampleAnnotationCrudInterface.java new file mode 100644 index 000000000..1b4187ec1 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/HTPExpressionDatasetSampleAnnotationCrudInterface.java @@ -0,0 +1,29 @@ +package org.alliancegenome.curation_api.interfaces.crud; + +import org.alliancegenome.curation_api.interfaces.base.BaseIdCrudInterface; +import org.alliancegenome.curation_api.model.entities.HTPExpressionDatasetSampleAnnotation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationIngestFmsDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; + +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType; + +@Path("/htpexpressiondatasetsampleannotation") +@Tag(name = "CRUD - HTP Expression Dataset Sample Annotation") +@Produces(MediaType.APPLICATION_JSON) +@Consumes(MediaType.APPLICATION_JSON) +public interface HTPExpressionDatasetSampleAnnotationCrudInterface extends BaseIdCrudInterface { + + @POST + @Path("/bulk/{dataProvider}/htpexpressiondatasetsampleannotationfile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateHTPExpressionDatasetSampleAnnotation(@PathParam("dataProvider") String dataProvider, HTPExpressionDatasetSampleAnnotationIngestFmsDTO htpDatasetSampleData); +} \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/BioSampleAge.java b/src/main/java/org/alliancegenome/curation_api/model/entities/BioSampleAge.java index d359e8e83..ff37f7e7e 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/BioSampleAge.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/BioSampleAge.java @@ -34,7 +34,7 @@ public class BioSampleAge extends AuditedObject { @IndexedEmbedded(includePaths = {"age", "age_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) - @ManyToOne + @ManyToOne(cascade = CascadeType.ALL) @JsonView({ View.FieldsOnly.class }) private TemporalContext stage; diff --git a/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java index 31358cdca..6b0097203 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/HTPExpressionDatasetSampleAnnotationService.java @@ -18,6 +18,7 @@ import jakarta.annotation.PostConstruct; import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; +import jakarta.transaction.Transactional; @RequestScoped public class HTPExpressionDatasetSampleAnnotationService extends BaseEntityCrudService implements BaseUpsertServiceInterface { @@ -30,7 +31,7 @@ public class HTPExpressionDatasetSampleAnnotationService extends BaseEntityCrudS protected void init() { setSQLDao(htpExpressionDatasetSampleAnnotationDAO); } - + @Transactional public HTPExpressionDatasetSampleAnnotation upsert(HTPExpressionDatasetSampleAnnotationFmsDTO htpExpressionDatasetSampleAnnotationData, BackendBulkDataProvider backendBulkDataProvider) throws ValidationException { return htpExpressionDatasetSampleAnnotationFmsDtoValidator.validateHTPExpressionDatasetSampleAnnotationFmsDTO(htpExpressionDatasetSampleAnnotationData, backendBulkDataProvider); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index 628280680..392106309 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -316,7 +316,7 @@ private ObjectResponse validateEvidence(GeneExpressionFmsDTO geneExpr return response; } - private TemporalContext updateTemporalContext(ObjectResponse temporalContextObjectResponse, TemporalContext whenExpressed) { + protected TemporalContext updateTemporalContext(ObjectResponse temporalContextObjectResponse, TemporalContext whenExpressed) { TemporalContext temporalContext = temporalContextObjectResponse.getEntity(); TemporalContext temporalContextDB = whenExpressed; if (temporalContextDB == null) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index f347ed59e..9b051443e 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -7,6 +7,7 @@ import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.constants.VocabularyConstants; +import org.alliancegenome.curation_api.dao.AnatomicalSiteDAO; import org.alliancegenome.curation_api.dao.HTPExpressionDatasetSampleAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; @@ -25,6 +26,7 @@ import org.alliancegenome.curation_api.model.entities.ontology.MMOTerm; import org.alliancegenome.curation_api.model.entities.ontology.NCBITaxonTerm; import org.alliancegenome.curation_api.model.entities.ontology.OBITerm; +import org.alliancegenome.curation_api.model.ingest.dto.fms.BioSampleGenomicInformationFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedFmsDTO; import org.alliancegenome.curation_api.response.ObjectResponse; @@ -37,10 +39,8 @@ import org.alliancegenome.curation_api.services.ontology.MmoTermService; import org.alliancegenome.curation_api.services.ontology.NcbiTaxonTermService; import org.alliancegenome.curation_api.services.ontology.ObiTermService; -import org.alliancegenome.curation_api.services.ontology.StageTermService; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; - import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; import jakarta.transaction.Transactional; @@ -56,10 +56,10 @@ public class HTPExpressionDatasetSampleAnnotationFmsDTOValidator { @Inject DataProviderService dataProviderService; @Inject ObiTermService obiTermService; @Inject MmoTermService mmoTermService; - @Inject StageTermService stageTermService; @Inject AlleleService alleleService; @Inject AffectedGenomicModelService affectedGenomicModelService; @Inject NcbiTaxonTermService ncbiTaxonTermService; + @Inject AnatomicalSiteDAO anatomicalSiteDAO; @Transactional public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAnnotationFmsDTO(HTPExpressionDatasetSampleAnnotationFmsDTO dto, BackendBulkDataProvider backendBulkDataProvider) throws ValidationException { @@ -110,28 +110,27 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } if (dto.getSampleAge() != null) { + if (htpSampleAnnotation.getHtpExpressionSampleAge() == null) { + htpSampleAnnotation.setHtpExpressionSampleAge(new BioSampleAge()); + } ObjectResponse temporalContextObjectResponse = geneExpressionAnnotationFmsDTOValidator.validateTemporalContext(dto.getSampleAge().getStage()); if (temporalContextObjectResponse.hasErrors()) { - htpSampleAnnotationResponse.addErrorMessage("BioSampleAge", temporalContextObjectResponse.errorMessagesString()); + htpSampleAnnotationResponse.addErrorMessage("Sample Age - Stage", temporalContextObjectResponse.errorMessagesString()); } else { - if (htpSampleAnnotation.getHtpExpressionSampleAge() == null) { - htpSampleAnnotation.setHtpExpressionSampleAge(new BioSampleAge()); - htpSampleAnnotation.getHtpExpressionSampleAge().setStage(new TemporalContext()); - } - TemporalContext temporalContext = temporalContextObjectResponse.getEntity(); - TemporalContext temporalContextDB = htpSampleAnnotation.getHtpExpressionSampleAge().getStage(); - if (temporalContextDB == null) { - temporalContextDB = new TemporalContext(); - } - htpSampleAnnotation.getHtpExpressionSampleAge().setWhenExpressedStageName(dto.getSampleAge().getStage().getStageName()); - htpSampleAnnotation.getHtpExpressionSampleAge().setStage(temporalContextDB); - htpSampleAnnotation.getHtpExpressionSampleAge().getStage().setDevelopmentalStageStart(temporalContext.getDevelopmentalStageStart()); - htpSampleAnnotation.getHtpExpressionSampleAge().getStage().setStageUberonSlimTerms(temporalContext.getStageUberonSlimTerms()); + TemporalContext temporalContext = geneExpressionAnnotationFmsDTOValidator.updateTemporalContext(temporalContextObjectResponse, htpSampleAnnotation.getHtpExpressionSampleAge().getStage()); htpSampleAnnotation.getHtpExpressionSampleAge().setAge(dto.getSampleAge().getAge()); + htpSampleAnnotation.getHtpExpressionSampleAge().setWhenExpressedStageName(dto.getSampleAge().getStage().getStageName()); + htpSampleAnnotation.getHtpExpressionSampleAge().setStage(temporalContext); } } + List idsToRemove = new ArrayList<>(); if (CollectionUtils.isNotEmpty(dto.getSampleLocations())) { + if(CollectionUtils.isNotEmpty(htpSampleAnnotation.getHtpExpressionSampleLocations())) { + for (AnatomicalSite anatomicalSite : htpSampleAnnotation.getHtpExpressionSampleLocations()) { + idsToRemove.add(anatomicalSite.getId()); + } + } List htpSampleLocations = new ArrayList<>(); for (WhereExpressedFmsDTO whereExpressedDTO : dto.getSampleLocations()) { ObjectResponse anatomicalSiteObjectResponse = geneExpressionAnnotationFmsDTOValidator.validateAnatomicalSite(whereExpressedDTO); @@ -147,23 +146,24 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn if (dto.getGenomicInformation() != null) { if (htpSampleAnnotation.getGenomicInformation() == null) { htpSampleAnnotation.setGenomicInformation(new BioSampleGenomicInformation()); - } - if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { - String identifierString = dto.getGenomicInformation().getBiosampleId(); - Allele allele = alleleService.findByIdentifierString(identifierString); - if (allele == null) { - AffectedGenomicModel agm = affectedGenomicModelService.findByIdentifierString(identifierString); - if (agm == null) { - htpSampleAnnotationResponse.addErrorMessage("GenomicInformation", ValidationConstants.INVALID_MESSAGE + " (" + identifierString + ")"); - } else { - htpSampleAnnotation.getGenomicInformation().setBioSampleAgm(agm); - VocabularyTerm agmType = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.AGM_SUBTYPE_VOCABULARY, dto.getGenomicInformation().getIdType()).getEntity(); - if (agmType != null) { - htpSampleAnnotation.getGenomicInformation().setBioSampleAgmType(agmType); - } + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { + validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); + } else { + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId", ValidationConstants.REQUIRED_MESSAGE); + } + } else { + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { + String identifierString = null; + if (htpSampleAnnotation.getGenomicInformation().getBioSampleAgm() != null) { + identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAgm().getIdentifier(); + } else if (htpSampleAnnotation.getGenomicInformation().getBioSampleAllele() != null) { + identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAllele().getIdentifier(); + } + if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId())) { + validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); } } else { - htpSampleAnnotation.getGenomicInformation().setBioSampleAllele(allele); + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId", ValidationConstants.REQUIRED_MESSAGE); } } } else { @@ -252,6 +252,31 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn throw new ObjectValidationException(dto, htpSampleAnnotationResponse.errorMessagesString()); } - return htpExpressionDatasetSampleAnnotationDAO.persist(htpSampleAnnotation); + HTPExpressionDatasetSampleAnnotation htp = htpExpressionDatasetSampleAnnotationDAO.persist(htpSampleAnnotation); + for (Long id : idsToRemove) { + anatomicalSiteDAO.remove(id); + } + return htp; + } + + protected void validateGenomicInformation(BioSampleGenomicInformationFmsDTO dto, HTPExpressionDatasetSampleAnnotation htpSampleAnnotation, ObjectResponse htpSampleAnnotationResponse) { + if (StringUtils.isNotEmpty(dto.getBiosampleId())) { + String identifierString = dto.getBiosampleId(); + Allele allele = alleleService.findByIdentifierString(identifierString); + if (allele != null) { + htpSampleAnnotation.getGenomicInformation().setBioSampleAllele(allele); + } else { + AffectedGenomicModel agm = affectedGenomicModelService.findByIdentifierString(identifierString); + if (agm == null) { + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId", ValidationConstants.INVALID_MESSAGE + " (" + identifierString + ")"); + } else { + htpSampleAnnotation.getGenomicInformation().setBioSampleAgm(agm); + VocabularyTerm agmType = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.AGM_SUBTYPE_VOCABULARY, dto.getIdType()).getEntity(); + if (agmType != null) { + htpSampleAnnotation.getGenomicInformation().setBioSampleAgmType(agmType); + } + } + } + } } } From 118f52ee38cd8217dbf4c7ff543c3ee05a4564a4 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 31 Oct 2024 11:52:26 -0500 Subject: [PATCH 012/118] Checkstyle fix --- .../HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 9b051443e..aa9bee2d5 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -126,7 +126,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn List idsToRemove = new ArrayList<>(); if (CollectionUtils.isNotEmpty(dto.getSampleLocations())) { - if(CollectionUtils.isNotEmpty(htpSampleAnnotation.getHtpExpressionSampleLocations())) { + if (CollectionUtils.isNotEmpty(htpSampleAnnotation.getHtpExpressionSampleLocations())) { for (AnatomicalSite anatomicalSite : htpSampleAnnotation.getHtpExpressionSampleLocations()) { idsToRemove.add(anatomicalSite.getId()); } From 9d1ca42d46129fe2aedd3d0738fe6edd6722c3aa Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 31 Oct 2024 15:39:22 -0500 Subject: [PATCH 013/118] Changes to validation --- ...atasetSampleAnnotationFmsDTOValidator.java | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index aa9bee2d5..9401e9c69 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -66,6 +66,12 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn ObjectResponse htpSampleAnnotationResponse = new ObjectResponse<>(); HTPExpressionDatasetSampleAnnotation htpSampleAnnotation; + Boolean sampleExists = ((dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId())) || StringUtils.isNotEmpty(dto.getSampleTitle())); + + if(!sampleExists) { + htpSampleAnnotationResponse.addErrorMessage("SampleId or Sample Title", ValidationConstants.REQUIRED_MESSAGE); + } + if (StringUtils.isNotBlank(dto.getSampleId().getPrimaryId())) { String curie = dto.getSampleId().getPrimaryId(); ExternalDataBaseEntity externalDbEntity = externalDataBaseEntityFmsDtoValidator.validateExternalDataBaseEntityFmsDTO(dto.getSampleId()); @@ -85,7 +91,6 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn htpSampleAnnotation = new HTPExpressionDatasetSampleAnnotation(); } } else { - htpSampleAnnotationResponse.addErrorMessage("SampleId", ValidationConstants.REQUIRED_MESSAGE); htpSampleAnnotation = new HTPExpressionDatasetSampleAnnotation(); } @@ -144,30 +149,35 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } if (dto.getGenomicInformation() != null) { - if (htpSampleAnnotation.getGenomicInformation() == null) { - htpSampleAnnotation.setGenomicInformation(new BioSampleGenomicInformation()); - if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { - validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); - } else { - htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId", ValidationConstants.REQUIRED_MESSAGE); - } - } else { - if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { - String identifierString = null; - if (htpSampleAnnotation.getGenomicInformation().getBioSampleAgm() != null) { - identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAgm().getIdentifier(); - } else if (htpSampleAnnotation.getGenomicInformation().getBioSampleAllele() != null) { - identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAllele().getIdentifier(); - } - if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId())) { + Boolean genomicInformationExists = (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText()) == true || StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId()) == true); + if (genomicInformationExists) { + if (htpSampleAnnotation.getGenomicInformation() == null) { + htpSampleAnnotation.setGenomicInformation(new BioSampleGenomicInformation()); + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); } + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText())) { + htpSampleAnnotation.getGenomicInformation().setBioSampleText(dto.getGenomicInformation().getBioSampleText()); + } } else { - htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId", ValidationConstants.REQUIRED_MESSAGE); + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId())) { + String identifierString = null; + if (htpSampleAnnotation.getGenomicInformation().getBioSampleAgm() != null) { + identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAgm().getIdentifier(); + } else if (htpSampleAnnotation.getGenomicInformation().getBioSampleAllele() != null) { + identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAllele().getIdentifier(); + } + if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId())) { + validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); + } + if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText())) { + htpSampleAnnotation.getGenomicInformation().setBioSampleText(dto.getGenomicInformation().getBioSampleText()); + } + } } + } else { + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId or BioSampleText", ValidationConstants.REQUIRED_MESSAGE); } - } else { - htpSampleAnnotationResponse.addErrorMessage("GenomicInformation", ValidationConstants.REQUIRED_MESSAGE); } if (StringUtils.isNotEmpty(dto.getSex())) { From 940a3c74361a4388d8671acc67bfec1f21b97c20 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 31 Oct 2024 15:43:18 -0500 Subject: [PATCH 014/118] Checkstyle fix --- ...HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 9401e9c69..20276c6c4 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -66,9 +66,9 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn ObjectResponse htpSampleAnnotationResponse = new ObjectResponse<>(); HTPExpressionDatasetSampleAnnotation htpSampleAnnotation; - Boolean sampleExists = ((dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId())) || StringUtils.isNotEmpty(dto.getSampleTitle())); + Boolean sampleExists = (dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId()) || StringUtils.isNotEmpty(dto.getSampleTitle())); - if(!sampleExists) { + if (!sampleExists) { htpSampleAnnotationResponse.addErrorMessage("SampleId or Sample Title", ValidationConstants.REQUIRED_MESSAGE); } @@ -149,7 +149,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } if (dto.getGenomicInformation() != null) { - Boolean genomicInformationExists = (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText()) == true || StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId()) == true); + Boolean genomicInformationExists = (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText()) == true) || (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId()) == true); if (genomicInformationExists) { if (htpSampleAnnotation.getGenomicInformation() == null) { htpSampleAnnotation.setGenomicInformation(new BioSampleGenomicInformation()); From ebe7822b02a93874fe8508010e2542ef62920b75 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 31 Oct 2024 15:52:31 -0500 Subject: [PATCH 015/118] checkstyle fix --- .../HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 20276c6c4..39f5c839d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -66,7 +66,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn ObjectResponse htpSampleAnnotationResponse = new ObjectResponse<>(); HTPExpressionDatasetSampleAnnotation htpSampleAnnotation; - Boolean sampleExists = (dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId()) || StringUtils.isNotEmpty(dto.getSampleTitle())); + Boolean sampleExists = (dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId())) || StringUtils.isNotEmpty(dto.getSampleTitle()); if (!sampleExists) { htpSampleAnnotationResponse.addErrorMessage("SampleId or Sample Title", ValidationConstants.REQUIRED_MESSAGE); @@ -149,7 +149,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } if (dto.getGenomicInformation() != null) { - Boolean genomicInformationExists = (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText()) == true) || (StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId()) == true); + Boolean genomicInformationExists = StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText()) || StringUtils.isNotEmpty(dto.getGenomicInformation().getBiosampleId()); if (genomicInformationExists) { if (htpSampleAnnotation.getGenomicInformation() == null) { htpSampleAnnotation.setGenomicInformation(new BioSampleGenomicInformation()); From 27781dd83f62b3f7e2465dd6ee68b07600f1595a Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 31 Oct 2024 15:56:47 -0500 Subject: [PATCH 016/118] checkstyle fix --- .../HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 39f5c839d..66948ff41 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -66,7 +66,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn ObjectResponse htpSampleAnnotationResponse = new ObjectResponse<>(); HTPExpressionDatasetSampleAnnotation htpSampleAnnotation; - Boolean sampleExists = (dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId())) || StringUtils.isNotEmpty(dto.getSampleTitle()); + Boolean sampleExists = dto.getSampleId() != null && StringUtils.isNotEmpty(dto.getSampleId().getPrimaryId()) || StringUtils.isNotEmpty(dto.getSampleTitle()); if (!sampleExists) { htpSampleAnnotationResponse.addErrorMessage("SampleId or Sample Title", ValidationConstants.REQUIRED_MESSAGE); From dcfa9d98bc3fbbae819c8fa768ae5ecf65214d55 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Fri, 1 Nov 2024 11:19:42 -0400 Subject: [PATCH 017/118] MGI load fix --- .../HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 66948ff41..b441b5798 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -72,7 +72,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn htpSampleAnnotationResponse.addErrorMessage("SampleId or Sample Title", ValidationConstants.REQUIRED_MESSAGE); } - if (StringUtils.isNotBlank(dto.getSampleId().getPrimaryId())) { + if (dto.getSampleId() != null && StringUtils.isNotBlank(dto.getSampleId().getPrimaryId())) { String curie = dto.getSampleId().getPrimaryId(); ExternalDataBaseEntity externalDbEntity = externalDataBaseEntityFmsDtoValidator.validateExternalDataBaseEntityFmsDTO(dto.getSampleId()); if (externalDbEntity != null) { From 3d2d7fd41b83c37e2d637af78b8ec33b907c2a6c Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 4 Nov 2024 15:15:19 +0000 Subject: [PATCH 018/118] Add transcript_id to Transcript --- .../constants/ValidationConstants.java | 1 + .../curation_api/model/entities/Transcript.java | 13 ++++++++++++- .../GeneExpressionAnnotationUniqueIdHelper.java | 14 +++++++++----- .../validation/dto/Gff3DtoValidator.java | 6 ++++++ .../AlleleGeneAssociationDTOValidator.java | 2 +- ...uctGenomicEntityAssociationDTOValidator.java | 10 ++++++++-- .../dto/fms/ParalogyFmsDTOValidator.java | 2 +- .../dto/fms/VepGeneFmsDTOValidator.java | 8 ++++++-- .../dto/fms/VepTranscriptFmsDTOValidator.java | 17 ++++++++++++----- .../v0.37.0.65__predictedvariantconsequence.sql | 4 ++++ .../curation_api/Gff3BulkUploadITCase.java | 10 ++++++++++ .../fms/08_gff_data/ER_01_empty_seq_id.json | 3 ++- .../fms/08_gff_data/ER_02_empty_strand.json | 3 ++- .../ER_03_empty_transcript_parent.json | 3 ++- .../08_gff_data/ER_06_empty_transcript_id.json | 17 +++++++++++++++++ .../bulk/fms/08_gff_data/GFF_01_transcript.json | 3 ++- .../fms/08_gff_data/IV_01_invalid_strand.json | 3 ++- .../fms/08_gff_data/IV_02_invalid_phase.json | 3 ++- .../IV_03_invalid_transcript_parent.json | 3 ++- .../bulk/fms/08_gff_data/MR_01_no_seq_id.json | 3 ++- .../bulk/fms/08_gff_data/MR_02_no_start.json | 3 ++- .../bulk/fms/08_gff_data/MR_03_no_end.json | 3 ++- .../bulk/fms/08_gff_data/MR_04_no_strand.json | 3 ++- .../08_gff_data/MR_05_no_transcript_parent.json | 3 ++- .../fms/08_gff_data/MR_08_no_transcript_id.json | 16 ++++++++++++++++ .../08_gff_data/UD_01_update_transcript.json | 3 ++- 26 files changed, 129 insertions(+), 30 deletions(-) create mode 100644 src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json create mode 100644 src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json diff --git a/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java index 0e879c431..2ca7244d2 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java @@ -15,5 +15,6 @@ private ValidationConstants() { public static final String DUPLICATE_MESSAGE = "Duplicate entries found"; public static final String DUPLICATE_RELATION_PREFIX = "Entries found with same relation field - "; public static final String UNRECOGNIZED_MESSAGE = "Unrecognized entry"; // To be used instead of INVALID_MESSAGE when entry to be skipped instead of failed + public static final String AMBIGUOUS_MESSAGE = "Could not be unambiguously resolved"; } \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java index 46254b04e..0617af6f8 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java @@ -11,9 +11,14 @@ import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.search.engine.backend.types.Aggregable; +import org.hibernate.search.engine.backend.types.Searchable; +import org.hibernate.search.engine.backend.types.Sortable; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; import com.fasterxml.jackson.annotation.JsonView; @@ -33,12 +38,18 @@ @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) @ToString(exclude = { "transcriptGenomicLocationAssociations", "transcriptGeneAssociations", "transcriptCodingSequenceAssociations", "transcriptExonAssociations" }, callSuper = true) @Schema(name = "Transcript", description = "POJO that represents the Transcript") -@AGRCurationSchemaVersion(min = "2.4.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { GenomicEntity.class }) +@AGRCurationSchemaVersion(min = "2.8.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { GenomicEntity.class }) @Table(indexes = { + @Index(name = "transcript_transcriptId_index", columnList = "transcriptId"), @Index(name = "transcript_transcriptType_index", columnList = "transcriptType_id") }) public class Transcript extends GenomicEntity { + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "transcriptId_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + private String transcriptId; + @JsonView({ View.FieldsOnly.class }) private String name; diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java index 25174699f..e04ca58a8 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java @@ -14,11 +14,15 @@ public String generateUniqueId(GeneExpressionFmsDTO geneExpressionFmsDTO, String uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getAssay()); uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getGeneId()); uniqueIdGeneratorHelper.add(referenceCurie); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageTermId()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageName()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId()); + if (geneExpressionFmsDTO.getWhenExpressed() != null) { + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageTermId()); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageName()); + } + if (geneExpressionFmsDTO.getWhereExpressed() != null) { + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement()); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId()); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId()); + } return uniqueIdGeneratorHelper.getUniqueId(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index 430cec249..fa1322c7d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -171,6 +171,12 @@ public void validateTranscriptEntry(Gff3DTO dto, Map attributes, transcriptResponse.addErrorMessage("attributes - ID", ValidationConstants.REQUIRED_MESSAGE); } + if (attributes.containsKey("transcript_id")) { + transcript.setTranscriptId(attributes.get("transcript_id")); + } else { + transcriptResponse.addErrorMessage("attributes - transcript_id", ValidationConstants.REQUIRED_MESSAGE); + } + if (transcriptResponse.hasErrors()) { throw new ObjectValidationException(dto, transcriptResponse.errorMessagesString()); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java index bfd87c332..6351aa369 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java @@ -57,7 +57,7 @@ public AlleleGeneAssociation validateAlleleGeneAssociationDTO(AlleleGeneAssociat } AlleleGeneAssociation association = null; - if (subjectIds != null && subjectIds.size() == 1 && objectIds != null || objectIds.size() == 1 && StringUtils.isNotBlank(dto.getRelationName())) { + if (subjectIds != null && subjectIds.size() == 1 && objectIds != null && objectIds.size() == 1 && StringUtils.isNotBlank(dto.getRelationName())) { HashMap params = new HashMap<>(); params.put("alleleAssociationSubject.id", subjectIds.get(0)); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java index 62f269466..d5b8a7c31 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java @@ -47,7 +47,10 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati Construct construct = null; if (StringUtils.isNotBlank(dto.getConstructIdentifier())) { - construct = constructService.getShallowEntity(constructService.getIdByModID(dto.getConstructIdentifier())); + Long constructId = constructService.getIdByModID(dto.getConstructIdentifier()); + if (constructId != null) { + construct = constructService.getShallowEntity(constructId); + } if (construct == null) { assocResponse.addErrorMessage("construct_identifier", ValidationConstants.INVALID_MESSAGE); } else { @@ -63,7 +66,10 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati if (StringUtils.isBlank(dto.getGenomicEntityIdentifier())) { assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.REQUIRED_MESSAGE); } else { - genomicEntity = genomicEntityService.getShallowEntity(genomicEntityService.getIdByModID(dto.getGenomicEntityIdentifier())); + Long genomicEntityId = genomicEntityService.getIdByModID(dto.getGenomicEntityIdentifier()); + if (genomicEntityId != null) { + genomicEntity = genomicEntityService.getShallowEntity(genomicEntityId); + } if (genomicEntity == null) { assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.INVALID_MESSAGE + " (" + dto.getGenomicEntityIdentifier() + ")"); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java index c0c6060f3..bc0c8ac39 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java @@ -75,7 +75,7 @@ public GeneToGeneParalogy validateParalogyFmsDTO(ParalogyFmsDTO dto) throws Vali if (subjectGene == null) { paralogyResponse.addErrorMessage("gene1", ValidationConstants.INVALID_MESSAGE + " (" + subjectGeneIdentifier + ")"); } else { - if (!sameGenus(speciesTaxon, subjectGene.getTaxon())) { + if (speciesTaxon != null && !sameGenus(speciesTaxon, subjectGene.getTaxon())) { paralogyResponse.addErrorMessage("Species", ValidationConstants.INVALID_MESSAGE + " (" + dto.getSpecies() + ") for gene " + subjectGene.getCurie()); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java index fa97aa8bc..e888215d9 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java @@ -45,9 +45,13 @@ public Long validateGeneLevelConsequence(VepTxtDTO dto) throws ValidationExcepti if (StringUtils.isBlank(dto.getFeature())) { response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); } else { - transcript = transcriptService.getByIdentifier(dto.getFeature()).getEntity(); - if (transcript == null) { + SearchResponse searchResponse = transcriptService.findByField("transcriptId", dto.getFeature()); + if (searchResponse == null || searchResponse.getSingleResult() == null) { response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); + } else if (searchResponse.getReturnedRecords() > 1) { + response.addErrorMessage("feature", ValidationConstants.AMBIGUOUS_MESSAGE + " (" + dto.getFeature() + ")"); + } else { + transcript = searchResponse.getSingleResult(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index d2cfa4a6a..7031411bb 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -68,9 +68,13 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO if (StringUtils.isBlank(dto.getFeature())) { response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); } else { - transcript = transcriptService.getByIdentifier(dto.getFeature()).getEntity(); - if (transcript == null) { + SearchResponse searchResponse = transcriptService.findByField("transcriptId", dto.getFeature()); + if (searchResponse == null || searchResponse.getSingleResult() == null) { response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); + } else if (searchResponse.getReturnedRecords() > 1) { + response.addErrorMessage("feature", ValidationConstants.AMBIGUOUS_MESSAGE + " (" + dto.getFeature() + ")"); + } else { + transcript = searchResponse.getSingleResult(); } } @@ -139,11 +143,14 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO String variantCodon = null; if (StringUtils.isNotBlank(dto.getCodons())) { String[] refVarCodons = dto.getCodons().split("/"); - if (refVarCodons.length != 2) { - response.addErrorMessage("codons", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCodons() + ")"); - } else { + if (refVarCodons.length == 1 && dto.getConsequence().contains("synonymous_variant")) { + referenceCodon = dto.getCodons(); + variantCodon = dto.getCodons(); + } else if (refVarCodons.length == 2) { referenceCodon = refVarCodons[0]; variantCodon = refVarCodons[1]; + } else { + response.addErrorMessage("codons", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCodons() + ")"); } } predictedVariantConsequence.setCodonReference(referenceCodon); diff --git a/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql b/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql index 42eadf3ef..9e9ae8400 100644 --- a/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql +++ b/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql @@ -1,3 +1,7 @@ +ALTER TABLE transcript ADD COLUMN transcriptid VARCHAR(255); + +CREATE INDEX transcript_transcriptid_index ON transcript USING btree (transcriptid); + CREATE TABLE predictedvariantconsequence ( id bigint PRIMARY KEY, datecreated timestamp(6) with time zone, diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index 13f5ae5a9..aec218326 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -78,6 +78,7 @@ public void gff3DataBulkUploadTranscriptEntity() throws Exception { statusCode(200). body("entity.modInternalId", is(transcriptId)). body("entity.name", is("Y74C9A.2a.1")). + body("entity.transcriptId", is("WB:Y74C9A.2a.1")). body("entity.taxon.curie", is("NCBITaxon:6239")). body("entity.dataProvider.sourceOrganization.abbreviation", is("WB")). body("entity.transcriptType.curie", is("SO:0000234")). @@ -192,6 +193,7 @@ public void gff3DataBulkUploadUpdateTranscriptEntity() throws Exception { statusCode(200). body("entity.modInternalId", is(transcriptId)). body("entity.name", is("Y74C9A.2a.1")). + body("entity.transcriptId", is("RefSeq:Y74C9A.2a.1")). body("entity.taxon.curie", is("NCBITaxon:6239")). body("entity.dataProvider.sourceOrganization.abbreviation", is("WB")). body("entity.transcriptType.curie", is("SO:0001035")). @@ -226,6 +228,10 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_05_no_transcript_parent.json", params); checkBulkLoadRecordCounts(exonBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", params); checkBulkLoadRecordCounts(cdsBulkPostEndpoint, gffDataTestFilePath + "MR_07_no_cds_parent.json", params); + + params.put("Entities", createCountParams(1, 1, 0, 0)); + params.put("Locations", createCountParams(1, 1, 0, 0)); + checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_08_no_transcript_id.json", params); } @Test @@ -245,6 +251,10 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_03_empty_transcript_parent.json", params); checkBulkLoadRecordCounts(exonBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", params); checkBulkLoadRecordCounts(cdsBulkPostEndpoint, gffDataTestFilePath + "ER_05_empty_cds_parent.json", params); + + params.put("Entities", createCountParams(1, 1, 0, 0)); + params.put("Locations", createCountParams(1, 1, 0, 0)); + checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_06_empty_transcript_id.json", params); } @Test diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json b/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json index a691479e6..8cf67b6a4 100644 --- a/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json +++ b/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json b/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json index 32a9ac0ec..495bfdb3c 100644 --- a/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json +++ b/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json b/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json index 3e262c924..de0dc15de 100644 --- a/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json +++ b/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json b/src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json new file mode 100644 index 000000000..484c28d44 --- /dev/null +++ b/src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json @@ -0,0 +1,17 @@ +[ + { + "seqId": "I", + "source": "WormBase", + "type": "mRNA", + "start": 1, + "end": 1000, + "strand": "+", + "phase": 0, + "attributes": [ + "ID=Transcript:Y74C9A.2b.1", + "Parent=Gene:WBGene00022276", + "Name=Y74C9A.2b.1", + "transcript_id=" + ] + } +] diff --git a/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json b/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json index ef30adafe..e4157e0f5 100644 --- a/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json +++ b/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json b/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json index 920bde5f5..9d832b705 100644 --- a/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json +++ b/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json b/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json index e6f751326..788af87dd 100644 --- a/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json +++ b/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json b/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json index 8d524dd08..71503f4fd 100644 --- a/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json +++ b/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:Invalid", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json b/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json index e9651e61e..554c08d40 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json b/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json index 0997e4e33..6065ed59b 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json b/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json index 7284de1d8..fe1480334 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json b/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json index 3a6cdcbd6..d2162706f 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json b/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json index 123f5e739..1a76034db 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json @@ -9,7 +9,8 @@ "phase": 0, "attributes": [ "ID=Transcript:Y74C9A.2a.1", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json b/src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json new file mode 100644 index 000000000..b7b2ec3f8 --- /dev/null +++ b/src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json @@ -0,0 +1,16 @@ +[ + { + "seqId": "I", + "source": "WormBase", + "type": "mRNA", + "start": 1, + "end": 1000, + "strand": "+", + "phase": 0, + "attributes": [ + "ID=Transcript:Y74C9A.2b.1", + "Parent=Gene:WBGene00022276", + "Name=Y74C9A.2b.1" + ] + } +] diff --git a/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json b/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json index ed9d9e98e..227ff22ce 100644 --- a/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json +++ b/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=RefSeq:Y74C9A.2a.1" ] } ] From 7c16d43f7d590aa881170d2b6a803685faa82432 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 4 Nov 2024 22:24:51 +0000 Subject: [PATCH 019/118] Tweaks and fixes --- .../curation_api/constants/Gff3Constants.java | 2 +- .../helpers/variants/HgvsIdentifierHelper.java | 13 +++++++++++-- .../validation/dto/Gff3DtoValidator.java | 4 +++- .../dto/fms/VariantFmsDTOValidator.java | 10 ++++++++-- .../dto/fms/VepTranscriptFmsDTOValidator.java | 10 +++++++--- .../curation_api/Gff3BulkUploadITCase.java | 8 -------- .../08_gff_data/ER_06_empty_transcript_id.json | 17 ----------------- .../fms/08_gff_data/MR_08_no_transcript_id.json | 16 ---------------- 8 files changed, 30 insertions(+), 50 deletions(-) delete mode 100644 src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json delete mode 100644 src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json diff --git a/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java b/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java index 9ecf6976c..b613cf23b 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java @@ -12,7 +12,7 @@ private Gff3Constants() { "mRNA", "ncRNA", "piRNA", "lincRNA", "miRNA", "pre_miRNA", "snoRNA", "lncRNA", "tRNA", "snRNA", "rRNA", "antisense_RNA", "C_gene_segment", "V_gene_segment", "pseudogene_attribute", "pseudogenic_transcript", "lnc_RNA", "nc_primary_transcript", - "circular_ncRNA" + "circular_ncRNA", "transcript" ); public static final List STRANDS = List.of("+", "-"); diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java index 331712250..389ce0d88 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java @@ -1,5 +1,7 @@ package org.alliancegenome.curation_api.services.helpers.variants; +import java.util.Objects; + import org.alliancegenome.curation_api.model.ingest.dto.fms.VariantFmsDTO; import org.apache.commons.lang3.StringUtils; @@ -15,9 +17,16 @@ public static String getHgvsIdentifier(VariantFmsDTO dto) { if (dto.getEnd() != null) { end = Integer.toString(dto.getEnd()); } + + String refSeq = ""; + if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence()) && !Objects.equals(dto.getGenomicReferenceSequence(), "N/A")) { + refSeq = dto.getGenomicReferenceSequence(); + } - String varSeq = StringUtils.isBlank(dto.getGenomicVariantSequence()) ? "" : dto.getGenomicVariantSequence(); - String refSeq = StringUtils.isBlank(dto.getGenomicReferenceSequence()) ? "" : dto.getGenomicReferenceSequence(); + String varSeq = ""; + if (StringUtils.isNotBlank(dto.getGenomicVariantSequence()) && !Objects.equals(dto.getGenomicVariantSequence(), "N/A")) { + varSeq = dto.getGenomicVariantSequence(); + } String chrAccession = ""; if (StringUtils.isNotBlank(dto.getSequenceOfReferenceAccessionNumber())) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index fa1322c7d..289cbedf6 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -164,6 +164,8 @@ public void validateTranscriptEntry(Gff3DTO dto, Map attributes, if (attributes.containsKey("Name")) { transcript.setName(attributes.get("Name")); + } else { + transcript.setName(null); } ObjectResponse transcriptResponse = validateGenomicEntity(transcript, dto, attributes, dataProvider); @@ -174,7 +176,7 @@ public void validateTranscriptEntry(Gff3DTO dto, Map attributes, if (attributes.containsKey("transcript_id")) { transcript.setTranscriptId(attributes.get("transcript_id")); } else { - transcriptResponse.addErrorMessage("attributes - transcript_id", ValidationConstants.REQUIRED_MESSAGE); + transcript.setTranscriptId(null); } if (transcriptResponse.hasErrors()) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java index bb5f85282..5ae14da84 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java @@ -256,11 +256,17 @@ public void validateCuratedVariantGenomicLocationAssociation(VariantFmsDTO dto, association.setStart(dto.getStart()); association.setEnd(dto.getEnd()); association.setRelation(vocabularyTermService.getTermInVocabulary(VocabularyConstants.LOCATION_ASSOCIATION_RELATION_VOCABULARY, "located_on").getEntity()); - if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence())) { + + if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence()) && !Objects.equals(dto.getGenomicReferenceSequence(), "N/A")) { association.setReferenceSequence(dto.getGenomicReferenceSequence()); + } else { + association.setReferenceSequence(null); } - if (StringUtils.isNotBlank(dto.getGenomicVariantSequence())) { + + if (StringUtils.isNotBlank(dto.getGenomicVariantSequence()) && !Objects.equals(dto.getGenomicVariantSequence(), "N/A")) { association.setVariantSequence(dto.getGenomicVariantSequence()); + } else { + association.setVariantSequence(null); } if (variantId == null) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index 7031411bb..20d72f016 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -11,6 +11,7 @@ import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.constants.VocabularyConstants; import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.dao.associations.variantAssociations.CuratedVariantGenomicLocationAssociationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; @@ -23,7 +24,6 @@ import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.response.SearchResponse; -import org.alliancegenome.curation_api.services.TranscriptService; import org.alliancegenome.curation_api.services.VocabularyTermService; import org.alliancegenome.curation_api.services.associations.variantAssociations.CuratedVariantGenomicLocationAssociationService; import org.alliancegenome.curation_api.services.ontology.SoTermService; @@ -41,7 +41,7 @@ public class VepTranscriptFmsDTOValidator { @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; @Inject CuratedVariantGenomicLocationAssociationDAO cvglaDAO; @Inject CuratedVariantGenomicLocationAssociationService cvglaService; - @Inject TranscriptService transcriptService; + @Inject TranscriptDAO transcriptDAO; @Inject VocabularyTermService vocabularyTermService; @Inject SoTermService soTermService; @@ -68,7 +68,11 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO if (StringUtils.isBlank(dto.getFeature())) { response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); } else { - SearchResponse searchResponse = transcriptService.findByField("transcriptId", dto.getFeature()); + HashMap params = new HashMap<>(); + params.put("transcriptId", dto.getFeature()); + params.put("obsolete", false); + + SearchResponse searchResponse = transcriptDAO.findByParams(params); if (searchResponse == null || searchResponse.getSingleResult() == null) { response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); } else if (searchResponse.getReturnedRecords() > 1) { diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index aec218326..205203b3c 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -228,10 +228,6 @@ public void gff3DataBulkUploadMissingRequiredFields() throws Exception { checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_05_no_transcript_parent.json", params); checkBulkLoadRecordCounts(exonBulkPostEndpoint, gffDataTestFilePath + "MR_06_no_exon_parent.json", params); checkBulkLoadRecordCounts(cdsBulkPostEndpoint, gffDataTestFilePath + "MR_07_no_cds_parent.json", params); - - params.put("Entities", createCountParams(1, 1, 0, 0)); - params.put("Locations", createCountParams(1, 1, 0, 0)); - checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "MR_08_no_transcript_id.json", params); } @Test @@ -251,10 +247,6 @@ public void gff3DataBulkUploadEmptyRequiredFields() throws Exception { checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_03_empty_transcript_parent.json", params); checkBulkLoadRecordCounts(exonBulkPostEndpoint, gffDataTestFilePath + "ER_04_empty_exon_parent.json", params); checkBulkLoadRecordCounts(cdsBulkPostEndpoint, gffDataTestFilePath + "ER_05_empty_cds_parent.json", params); - - params.put("Entities", createCountParams(1, 1, 0, 0)); - params.put("Locations", createCountParams(1, 1, 0, 0)); - checkBulkLoadRecordCounts(transcriptBulkPostEndpoint, gffDataTestFilePath + "ER_06_empty_transcript_id.json", params); } @Test diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json b/src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json deleted file mode 100644 index 484c28d44..000000000 --- a/src/test/resources/bulk/fms/08_gff_data/ER_06_empty_transcript_id.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - { - "seqId": "I", - "source": "WormBase", - "type": "mRNA", - "start": 1, - "end": 1000, - "strand": "+", - "phase": 0, - "attributes": [ - "ID=Transcript:Y74C9A.2b.1", - "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2b.1", - "transcript_id=" - ] - } -] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json b/src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json deleted file mode 100644 index b7b2ec3f8..000000000 --- a/src/test/resources/bulk/fms/08_gff_data/MR_08_no_transcript_id.json +++ /dev/null @@ -1,16 +0,0 @@ -[ - { - "seqId": "I", - "source": "WormBase", - "type": "mRNA", - "start": 1, - "end": 1000, - "strand": "+", - "phase": 0, - "attributes": [ - "ID=Transcript:Y74C9A.2b.1", - "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2b.1" - ] - } -] From 1ea5cc8c073b584a8fd1f31aa2bc2e0136a387d3 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 4 Nov 2024 22:39:11 +0000 Subject: [PATCH 020/118] Update migration version --- ...consequence.sql => v0.38.0.1__predictedvariantconsequence.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{v0.37.0.65__predictedvariantconsequence.sql => v0.38.0.1__predictedvariantconsequence.sql} (100%) diff --git a/src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql b/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql similarity index 100% rename from src/main/resources/db/migration/v0.37.0.65__predictedvariantconsequence.sql rename to src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql From 7a421784282735ad847c3078874a2873f01f903b Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 4 Nov 2024 22:51:27 +0000 Subject: [PATCH 021/118] Checkstyle fixes --- .../CuratedVariantGenomicLocationAssociation.java | 3 ++- .../services/PredictedVariantConsequenceService.java | 2 +- .../validation/dto/fms/VepTranscriptFmsDTOValidator.java | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java index 23051135b..f62005b21 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java @@ -51,9 +51,10 @@ public class CuratedVariantGenomicLocationAssociation extends VariantGenomicLoca includePaths = { "variantTranscript.name", "variantTranscript.modEntityId", "variantTranscript.modInternalId", "variantTranscript.curie", - "vepConsequence.name", "variantTranscript.name_keyword", + "vepConsequence.name", "variantTranscript.name_keyword", "variantTranscript.modEntityId_keyword", "variantTranscript.modInternalId_keyword", "variantTranscript.curie_keyword", "vepConsequence.name_keyword", + "variantTranscript.transcriptId", "variantTranscript.transcriptId_keyword" } ) @OneToMany(mappedBy = "variantGenomicLocation", cascade = CascadeType.ALL, orphanRemoval = true) diff --git a/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java index 684a29752..e1b241bb6 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java @@ -93,7 +93,7 @@ public PredictedVariantConsequence resetGeneLevelConsequence(Long id, String req pvc.setUpdatedBy(updatedBy); pvc.setDateUpdated(OffsetDateTime.now()); return predictedVariantConsequenceDAO.persist(pvc); - } + } return pvc; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index 20d72f016..9a965af8d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -153,7 +153,7 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO } else if (refVarCodons.length == 2) { referenceCodon = refVarCodons[0]; variantCodon = refVarCodons[1]; - } else { + } else { response.addErrorMessage("codons", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCodons() + ")"); } } @@ -300,7 +300,7 @@ private Pair parsePathogenicityPredictionScore(String res private Pair parseStartEnd(String position) { Matcher matcher = POSITION_STRING.matcher(position); - if(!matcher.find()) { + if (!matcher.find()) { return null; } @@ -312,7 +312,7 @@ private Pair parseStartEnd(String position) { return null; } - if(positions.length == 1) { + if (positions.length == 1) { start = Integer.parseInt(position); end = start; } else { From 9f43e5df0be01522b1a5d3679525e7cfa3478715 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Tue, 5 Nov 2024 09:27:54 +0000 Subject: [PATCH 022/118] Index updates --- .../model/entities/PredictedVariantConsequence.java | 5 ++++- .../db/migration/v0.38.0.1__predictedvariantconsequence.sql | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java b/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java index 9e7677194..7920ede56 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java @@ -48,7 +48,10 @@ @Index(name = "predictedvariantconsequence_polyphenprediction_index", columnList = "polyphenprediction_id"), @Index(name = "predictedvariantconsequence_siftprediction_index", columnList = "siftprediction_id"), @Index(name = "predictedvariantconsequence_createdby_index", columnList = "createdby_id"), - @Index(name = "predictedvariantconsequence_updatedby_index", columnList = "updatedby_id") + @Index(name = "predictedvariantconsequence_updatedby_index", columnList = "updatedby_id"), + @Index(name = "predictedvariantconsequence_hgvsproteinnomenclature_index", columnList = "hgvsProteinNomenclature"), + @Index(name = "predictedvariantconsequence_hgvscodingnomenclature_index", columnList = "hgvsCodingNomenclature"), + @Index(name = "predictedvariantconsequence_variantgenomiclocation_index", columnList = "variantGenomicLocation_id") }) public class PredictedVariantConsequence extends AuditedObject { diff --git a/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql b/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql index 9e9ae8400..0bda5e377 100644 --- a/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql +++ b/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql @@ -47,6 +47,9 @@ CREATE INDEX predictedvariantconsequence_polyphenprediction_index ON predictedva CREATE INDEX predictedvariantconsequence_siftprediction_index ON predictedvariantconsequence USING btree (siftprediction_id); CREATE INDEX predictedvariantconsequence_createdby_index ON predictedvariantconsequence USING btree (createdby_id); CREATE INDEX predictedvariantconsequence_updatedby_index ON predictedvariantconsequence USING btree (updatedby_id); +CREATE INDEX predictedvariantconsequence_hgvsproteinnomenclature_index ON predictedvariantconsequence USING btree (hgvsProteinNomenclature); +CREATE INDEX predictedvariantconsequence_hgvscodingnomenclature_index ON predictedvariantconsequence USING btree (hgvsCodingNomenclature); +CREATE INDEX predictedvariantconsequence_variantgenomiclocation_index ON predictedvariantconsequence USING btree (variantGenomicLocation_id); ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_createdby_id_fk FOREIGN KEY (createdby_id) REFERENCES person(id); ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_updatedby_id_fk FOREIGN KEY (updatedby_id) REFERENCES person(id); From 6eb9751448cf50a51544f985140cbf92d2ecf8ba Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 01:42:50 +0000 Subject: [PATCH 023/118] Handle amino acids for synonymous variants --- .../validation/dto/fms/VepTranscriptFmsDTOValidator.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index 9a965af8d..cd5ed1453 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -164,11 +164,14 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO String variantAminoAcids = null; if (StringUtils.isNotBlank(dto.getAminoAcids())) { String[] refVarAminoAcids = dto.getAminoAcids().split("/"); - if (refVarAminoAcids.length != 2) { - response.addErrorMessage("aminoAcids", ValidationConstants.INVALID_MESSAGE + " (" + dto.getAminoAcids() + ")"); - } else { + if (refVarAminoAcids.length == 1 && dto.getConsequence().contains("synonymous_variant")) { + referenceAminoAcids = refVarAminoAcids[0]; + variantAminoAcids = refVarAminoAcids[0]; + } else if (refVarAminoAcids.length == 2) { referenceAminoAcids = refVarAminoAcids[0]; variantAminoAcids = refVarAminoAcids[1]; + } else { + response.addErrorMessage("aminoAcids", ValidationConstants.INVALID_MESSAGE + " (" + dto.getAminoAcids() + ")"); } } predictedVariantConsequence.setAminoAcidReference(referenceAminoAcids); From 79b78e085f22c7349d116ed56c4d8d379185fd95 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 12:37:05 +0000 Subject: [PATCH 024/118] Skip VEP results for intergenic variants --- .../validation/dto/fms/VepGeneFmsDTOValidator.java | 4 ++++ .../dto/fms/VepTranscriptFmsDTOValidator.java | 4 ++++ .../org/alliancegenome/curation_api/VepFmsITCase.java | 7 +++++++ .../bulk/fms/11_vep/US_01_unsupported_intergenic.json | 11 +++++++++++ 4 files changed, 26 insertions(+) create mode 100644 src/test/resources/bulk/fms/11_vep/US_01_unsupported_intergenic.json diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java index e888215d9..b163a08e9 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java @@ -2,6 +2,7 @@ import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; @@ -43,6 +44,9 @@ public Long validateGeneLevelConsequence(VepTxtDTO dto) throws ValidationExcepti Transcript transcript = null; if (StringUtils.isBlank(dto.getFeature())) { + if (dto.getConsequence().contains("intergenic_variant")) { + throw new KnownIssueValidationException("Intergenic variant consequences not currently supported"); + } response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); } else { SearchResponse searchResponse = transcriptService.findByField("transcriptId", dto.getFeature()); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index cd5ed1453..6f240183b 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -14,6 +14,7 @@ import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.dao.associations.variantAssociations.CuratedVariantGenomicLocationAssociationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; @@ -66,6 +67,9 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO Transcript transcript = null; if (StringUtils.isBlank(dto.getFeature())) { + if (dto.getConsequence().contains("intergenic_variant")) { + throw new KnownIssueValidationException("Intergenic variant consequences not currently supported"); + } response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); } else { HashMap params = new HashMap<>(); diff --git a/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java index f971cfebf..e68759c39 100644 --- a/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java @@ -196,4 +196,11 @@ public void vepTranscriptUpdate() throws Exception { body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(true)); } + @Test + @Order(7) + public void vepSkipIntergenic() throws Exception { + checkSkippedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "US_01_unsupported_intergenic.json"); + checkSkippedBulkLoad(vepGeneFmsBulkPostEndpoint, vepFmsTestFilePath + "US_01_unsupported_intergenic.json"); + } + } diff --git a/src/test/resources/bulk/fms/11_vep/US_01_unsupported_intergenic.json b/src/test/resources/bulk/fms/11_vep/US_01_unsupported_intergenic.json new file mode 100644 index 000000000..0738f2893 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/US_01_unsupported_intergenic.json @@ -0,0 +1,11 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "consequence": "intergenic_variant", + "extra": [ + "IMPACT=LOW" + ] + } +] + From 8de39cad2242296909a1dc28c7e5abdc57ef3898 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 15:20:21 +0000 Subject: [PATCH 025/118] Handle ambiguous codon change --- .../validation/dto/fms/VepTranscriptFmsDTOValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index 6f240183b..3f82a0a9c 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -168,7 +168,7 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO String variantAminoAcids = null; if (StringUtils.isNotBlank(dto.getAminoAcids())) { String[] refVarAminoAcids = dto.getAminoAcids().split("/"); - if (refVarAminoAcids.length == 1 && dto.getConsequence().contains("synonymous_variant")) { + if (refVarAminoAcids.length == 1 && (Objects.equals(dto.getAminoAcids(), "X") || dto.getConsequence().contains("synonymous_variant"))) { referenceAminoAcids = refVarAminoAcids[0]; variantAminoAcids = refVarAminoAcids[0]; } else if (refVarAminoAcids.length == 2) { From 0010c238800a589ca03312bd11274703b45ad0cd Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Wed, 6 Nov 2024 17:35:27 +0100 Subject: [PATCH 026/118] SCRUM-4190 GAF load (#1691) * SCRUM-4190 GAF load * refactor to harmonize the Java model with the linkML model * remove unused imports * rename flyway file * add Schemaversion annotation * rename flyway file * refactor * remove unused imports * add columns to GeneOntologyAnnotation * add columns to GeneOntologyAnnotation * add columns to GeneOntologyAnnotation * add columns to GeneOntologyAnnotation table * rename sequence * SCRUM-4190 rename service and dao class * cleanup runcleanup methods * SCRUM-4190 refactoring, adding indexes to id columns. * remove unneccessary semicolon * SCRUM-4190 refactor according to PR review * refactor * remove unused import * consolidate return lines --- .../cliapp/src/service/DataLoadService.js | 1 + .../dao/GeneOntologyAnnotationDAO.java | 59 +++++ .../curation_api/dao/ontology/GoTermDAO.java | 21 +- .../enums/BackendBulkLoadType.java | 1 + .../GeneOntologyAnnotationCrudInterface.java | 17 ++ .../jobs/executors/BulkLoadJobExecutor.java | 4 + .../GeneOntologyAnnotationExecutor.java | 112 +++++++++ .../entities/GeneOntologyAnnotation.java | 27 +++ .../ingest/dto/GeneOntologyAnnotationDTO.java | 19 ++ .../GeneOntologyAnnotationService.java | 137 +++++++++++ .../db/migration/v0.38.0.2__gaf-load.sql | 222 ++++++++++++++++++ 11 files changed, 619 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/dao/GeneOntologyAnnotationDAO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneOntologyAnnotationCrudInterface.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/entities/GeneOntologyAnnotation.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/GeneOntologyAnnotationDTO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java create mode 100644 src/main/resources/db/migration/v0.38.0.2__gaf-load.sql diff --git a/src/main/cliapp/src/service/DataLoadService.js b/src/main/cliapp/src/service/DataLoadService.js index ebd513c14..3b7a120f9 100644 --- a/src/main/cliapp/src/service/DataLoadService.js +++ b/src/main/cliapp/src/service/DataLoadService.js @@ -107,6 +107,7 @@ export class DataLoadService extends BaseAuthService { 'DISEASE_ANNOTATION', 'RESOURCE_DESCRIPTOR', 'EXPRESSION_ATLAS', + 'GAF', ], BulkManualLoad: [ 'FULL_INGEST', diff --git a/src/main/java/org/alliancegenome/curation_api/dao/GeneOntologyAnnotationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/GeneOntologyAnnotationDAO.java new file mode 100644 index 000000000..455352f1b --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/GeneOntologyAnnotationDAO.java @@ -0,0 +1,59 @@ +package org.alliancegenome.curation_api.dao; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.persistence.Query; +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation; +import org.alliancegenome.curation_api.model.entities.Organization; +import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@ApplicationScoped +public class GeneOntologyAnnotationDAO extends BaseSQLDAO { + + protected GeneOntologyAnnotationDAO() { + super(GeneOntologyAnnotation.class); + } + + public GeneOntologyAnnotation persistGeneGoAssociation(GeneOntologyAnnotation gaf) { + String sql = """ + insert into GeneOntologyAnnotation (id, singlegene_id,goterm_id) + VALUES (nextval('GeneOntologyAnnotation_SEQ'), :geneID, :goID) + """; + Query query = entityManager.createNativeQuery(sql); + query.setParameter("goID", gaf.getGoTerm().getId()); + query.setParameter("geneID", gaf.getSingleGene().getId()); + query.executeUpdate(); + + sql = "select currval('GeneOntologyAnnotation_SEQ')"; + Object object = entityManager.createNativeQuery(sql).getSingleResult(); + gaf.setId((Long) object); + return gaf; + } + + public Map getAllGafIdsPerProvider(Organization sourceOrganization) { + Query query = entityManager.createNativeQuery(""" + select gga.id, be.modentityid, ot.curie + from GeneOntologyAnnotation as gga , BiologicalEntity as be, ontologyterm as ot, + species as spec + where gga.singlegene_id = be.id + and be.taxon_id = spec.taxon_id + and spec.displayname = :speciesName + and gga.goterm_id = ot.id + """); + query.setParameter("speciesName", sourceOrganization.getAbbreviation()); + List result = query.getResultList(); + Map map = new HashMap<>(); + result.forEach(object -> { + GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO(); + dto.setGeneIdentifier((String) object[1]); + dto.setGoTermCurie((String) object[2]); + map.put((Long) object[0], dto); + }); + return map; + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ontology/GoTermDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ontology/GoTermDAO.java index 2dc7e19ec..9b8d9ab98 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ontology/GoTermDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ontology/GoTermDAO.java @@ -1,9 +1,13 @@ package org.alliancegenome.curation_api.dao.ontology; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.ontology.GOTerm; -import jakarta.enterprise.context.ApplicationScoped; +import java.util.HashMap; +import java.util.List; +import java.util.Map; @ApplicationScoped public class GoTermDAO extends BaseSQLDAO { @@ -12,4 +16,19 @@ protected GoTermDAO() { super(GOTerm.class); } + public Map getAllGOIds() { + String sql = """ + select id, curie + from ontologyterm + where ontologytermtype = :type + """; + Query query = entityManager.createNativeQuery(sql); + query.setParameter("type", "GOTerm"); + List objects = query.getResultList(); + Map ensemblGeneMap = new HashMap<>(); + objects.forEach(object -> { + ensemblGeneMap.put((String) object[1], (Long) object[0]); + }); + return ensemblGeneMap; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java index c06d1b845..ba9ea290c 100644 --- a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java +++ b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java @@ -31,6 +31,7 @@ public enum BackendBulkLoadType { INTERACTION_MOL("tsv"), EXPRESSION_ATLAS("tsv"), + GAF("tsv"), INTERACTION_GEN("tsv"), BIOGRID_ORCS("tsv"), PARALOGY("json"), diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneOntologyAnnotationCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneOntologyAnnotationCrudInterface.java new file mode 100644 index 000000000..53d81e7e4 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneOntologyAnnotationCrudInterface.java @@ -0,0 +1,17 @@ +package org.alliancegenome.curation_api.interfaces.crud; + +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType; +import org.alliancegenome.curation_api.interfaces.base.crud.BaseCreateControllerInterface; +import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; + +@Path("/gaf") +@Tag(name = "CRUD - GAF") +@Produces(MediaType.APPLICATION_JSON) +@Consumes(MediaType.APPLICATION_JSON) +public interface GeneOntologyAnnotationCrudInterface extends BaseCreateControllerInterface { + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index ceb7e70e5..a9174d253 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -65,6 +65,8 @@ public class BulkLoadJobExecutor { @Inject VepGeneExecutor vepGeneExecutor; @Inject ExpressionAtlasExecutor expressionAtlasExecutor; + @Inject + GeneOntologyAnnotationExecutor gafExecutor; @Inject BiogridOrcExecutor biogridOrcExecutor; @@ -148,6 +150,8 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th vepGeneExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASAMPLE) { htpExpressionDatasetSampleAnnotationExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GAF) { + gafExecutor.execLoad(bulkLoadFileHistory); } else { log.info("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); throw new Exception("Load: " + bulkLoadFileHistory.getBulkLoad().getName() + " for type " + bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() + " not implemented"); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java new file mode 100644 index 000000000..9e1737611 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java @@ -0,0 +1,112 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import lombok.extern.jbosslog.JBossLog; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation; +import org.alliancegenome.curation_api.model.entities.Organization; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkURLLoad; +import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO; +import org.alliancegenome.curation_api.services.GeneOntologyAnnotationService; +import org.alliancegenome.curation_api.services.OrganizationService; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.lang3.StringUtils; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.*; +import java.util.stream.Stream; +import java.util.zip.GZIPInputStream; + +@JBossLog +@ApplicationScoped +public class GeneOntologyAnnotationExecutor extends LoadFileExecutor { + + @Inject + GeneOntologyAnnotationService service; + @Inject + OrganizationService organizationService; + + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException { + + String url = ((BulkURLLoad) bulkLoadFileHistory.getBulkLoad()).getBulkloadUrl(); + + String[] tok = url.split("/"); + String orgAbbrev = tok[tok.length - 1].toUpperCase(); + String abbr = orgAbbrev.split("\\.")[0]; + Organization organization = organizationService.getByAbbr(abbr).getEntity(); + + // curie, List + Map> uiMap = new HashMap<>(); + Set orgIDs = new HashSet<>(); + GZIPInputStream stream = new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())); + try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) { + Stream lines = br.lines(); + + // Process each line + lines.filter(s -> !s.startsWith("!") && StringUtils.isNotEmpty(s)).forEach(s -> { + String[] token = s.split("\t"); + String orgID = token[0]; + orgIDs.add(orgID); + String modID = token[1]; + String goID = token[4]; + if (abbr.equals(orgID)) { + List goIDs = uiMap.computeIfAbsent(modID, list -> new ArrayList<>()); + goIDs.add(goID); + } + }); + + } catch (IOException e) { + e.printStackTrace(); + } + + String name = bulkLoadFileHistory.getBulkLoad().getName(); + + Map gafMap = service.getGafMap(organization); + List gafIdsBefore = new ArrayList<>(gafMap.keySet().stream().toList()); + gafIdsBefore.removeIf(Objects::isNull); + + List geneGoIdsLoaded = new ArrayList<>(); + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + List dtos = uiMap.entrySet() + .stream() + .map(entry -> entry.getValue().stream().map(goID -> { + GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO(); + dto.setGeneIdentifier(abbr + ":" + entry.getKey()); + dto.setGoTermCurie(goID); + return dto; + }).toList()).flatMap(Collection::stream).toList(); + + ph.startProcess(name, dtos.size()); + for (GeneOntologyAnnotationDTO modID : dtos) { + Long geneID = service.getGeneID(modID, abbr); + if (geneID != null) { + GeneOntologyAnnotation newGaf = service.insert(modID, abbr).getEntity(); + if (newGaf != null) { + geneGoIdsLoaded.add(newGaf.getId()); + bulkLoadFileHistory.incrementCompleted(); + } else { + bulkLoadFileHistory.incrementSkipped(); + } + } else { + addException(bulkLoadFileHistory, new ObjectUpdateException.ObjectUpdateExceptionData(modID, "Could not find gene " + modID.getGeneIdentifier(), null)); + bulkLoadFileHistory.incrementFailed(); + } + ph.progressProcess(); + } + bulkLoadFileHistory.setTotalCount(dtos.size()); + runCleanup(service, bulkLoadFileHistory, abbr, gafIdsBefore, geneGoIdsLoaded, "GAF Load"); + ph.finishProcess(); + updateHistory(bulkLoadFileHistory); + + bulkLoadFileHistory.finishLoad(); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/GeneOntologyAnnotation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneOntologyAnnotation.java new file mode 100644 index 000000000..2751d1bbd --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneOntologyAnnotation.java @@ -0,0 +1,27 @@ +package org.alliancegenome.curation_api.model.entities; + +import com.fasterxml.jackson.annotation.JsonTypeName; +import jakarta.persistence.Entity; +import jakarta.persistence.ManyToOne; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.base.AuditedObject; +import org.alliancegenome.curation_api.model.entities.ontology.GOTerm; +import org.eclipse.microprofile.openapi.annotations.media.Schema; + +@Entity +@Data +@EqualsAndHashCode +@Schema(name = "Gene_Disease_Annotation", description = "Annotation class representing a gene disease annotation") +@JsonTypeName("GeneOntologyAnnotation") +@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE) +public class GeneOntologyAnnotation extends AuditedObject { + + @ManyToOne + private GOTerm goTerm; + @ManyToOne + private Gene singleGene; + +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/GeneOntologyAnnotationDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/GeneOntologyAnnotationDTO.java new file mode 100644 index 000000000..56c21d54d --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/GeneOntologyAnnotationDTO.java @@ -0,0 +1,19 @@ +package org.alliancegenome.curation_api.model.ingest.dto; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.Annotation; + +@Data +@EqualsAndHashCode(callSuper = false) +@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {Annotation.class}) +public class GeneOntologyAnnotationDTO { + + private String geneIdentifier; + + private String goTermCurie; + + +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java new file mode 100644 index 000000000..90ff6d504 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java @@ -0,0 +1,137 @@ +package org.alliancegenome.curation_api.services; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; +import org.alliancegenome.curation_api.auth.AuthenticatedUser; +import org.alliancegenome.curation_api.dao.GeneDAO; +import org.alliancegenome.curation_api.dao.GeneOntologyAnnotationDAO; +import org.alliancegenome.curation_api.dao.SpeciesDAO; +import org.alliancegenome.curation_api.dao.ontology.GoTermDAO; +import org.alliancegenome.curation_api.model.entities.*; +import org.alliancegenome.curation_api.model.entities.ontology.GOTerm; +import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import org.alliancegenome.curation_api.services.validation.DataProviderValidator; + +import java.util.HashMap; +import java.util.Map; + +@RequestScoped +public class GeneOntologyAnnotationService extends BaseEntityCrudService { + + private Species species; + public static final String RESOURCE_DESCRIPTOR_PREFIX = "ENSEMBL"; + public static final String RESOURCE_DESCRIPTOR_PAGE_NAME = "default"; + // + Map accessionGeneMap = new HashMap<>(); + Map goTermMap = new HashMap<>(); + HashMap dataProviderMap = new HashMap<>(); + private Map gafMap = new HashMap<>(); + + + + @Inject + @AuthenticatedUser + protected Person authenticatedPerson; + @Inject + GeneOntologyAnnotationDAO gafDAO; + @Inject + GeneDAO geneDAO; + @Inject + SpeciesDAO speciesDAO; + @Inject + GoTermDAO goTermDAO; + @Inject + DataProviderValidator dataProviderValidator; + + @Override + @PostConstruct + protected void init() { + setSQLDao(gafDAO); + } + + @Transactional + public ObjectResponse insert(GeneOntologyAnnotationDTO uiEntity, String orgAbbreviation) { + // if record exists skip over it + if (gafMap.values().stream().anyMatch(gafDTO -> gafDTO.equals(uiEntity))) { + for (Map.Entry entry : gafMap.entrySet()) { + if (entry.getValue().equals(uiEntity)) { + GeneOntologyAnnotation gaf = new GeneOntologyAnnotation(); + gaf.setId(entry.getKey()); + ObjectResponse objectObjectResponse = new ObjectResponse<>(); + objectObjectResponse.setEntity(gaf); + return objectObjectResponse; + } + } + } + // convert curies into IDs + + Long geneID = getGeneID(uiEntity, orgAbbreviation); + GeneOntologyAnnotation gaf = new GeneOntologyAnnotation(); + Gene gene = new Gene(); + gene.setId(geneID); + gaf.setSingleGene(gene); + Long goID = getGOID(uiEntity); + GOTerm term = new GOTerm(); + term.setId(goID); + gaf.setGoTerm(term); + GeneOntologyAnnotation gafNew = gafDAO.persistGeneGoAssociation(gaf); + addNewRecordToMap(gafNew, uiEntity); + return new ObjectResponse<>(gafNew); + } + + private void addNewRecordToMap(GeneOntologyAnnotation gafNew, GeneOntologyAnnotationDTO uiEntity) { + GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO(); + dto.setGeneIdentifier(uiEntity.getGeneIdentifier()); + dto.setGoTermCurie(uiEntity.getGoTermCurie()); + gafMap.put(gafNew.getId(), dto); + } + + public Long getGeneID(GeneOntologyAnnotationDTO uiEntity, String orgAbbreviation) { + if (accessionGeneMap.isEmpty()) { + accessionGeneMap = geneDAO.getAllGeneIdsPerSpecies(getSpecies(orgAbbreviation)); + } + Long geneID = accessionGeneMap.get(uiEntity.getGeneIdentifier()); + return geneID; + } + + private Long getGOID(GeneOntologyAnnotationDTO uiEntity) { + if (goTermMap.isEmpty()) { + goTermMap = goTermDAO.getAllGOIds(); + } + Long goID = goTermMap.get(uiEntity.getGoTermCurie()); + return goID; + } + + private Species getSpecies(String orgAbbreviation) { + if (species != null) { + return species; + } + Map map = new HashMap<>(); + map.put("displayName", orgAbbreviation); + species = speciesDAO.findByParams(map).getSingleResult(); + return species; + } + + public ObjectResponse validate(DataProvider uiEntity) { + return dataProviderValidator.validateDataProvider(uiEntity, null, true); + } + + + public Map getGafMap(Organization organization) { + if (gafMap.size() > 0) { + return gafMap; + } + gafMap = gafDAO.getAllGafIdsPerProvider(organization); + return gafMap; + } + + @Transactional + public GeneOntologyAnnotation deprecateOrDelete(Long id, Boolean throwApiError, String requestSource, Boolean deprecate) { + return gafDAO.remove(id); + } + +} diff --git a/src/main/resources/db/migration/v0.38.0.2__gaf-load.sql b/src/main/resources/db/migration/v0.38.0.2__gaf-load.sql new file mode 100644 index 000000000..3f8d1ba2e --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.2__gaf-load.sql @@ -0,0 +1,222 @@ +insert into bulkloadgroup (id, name, internal, obsolete, dbdatecreated) +values (nextval('bulkloadgroup_seq'), 'GAF Load', false, false, now()); + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'ZFIN GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'SGD GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'WB GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'MGI GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'FB GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'RGD GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'HUMAN GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +insert into bulkload (id, backendbulkloadtype, name, internal, obsolete, group_id, dbdatecreated, bulkloadstatus) +select nextval('bulkload_seq'), + 'GAF', + 'XB GAF', + false, + false, + id, + now(), + 'STOPPED' +from bulkloadgroup +where name = 'GAF Load'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'ZFIN GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'SGD GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'MGI GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'WB GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'FB GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'RGD GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'HUMAN GAF'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true +FROM bulkload +WHERE name = 'XB GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/zfin.gaf.gz' +from bulkload +where name = 'ZFIN GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/sgd.gaf.gz' +from bulkload +where name = 'SGD GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/wb.gaf.gz' +from bulkload +where name = 'WB GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/mgi.gaf.gz' +from bulkload +where name = 'MGI GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/fb.gaf.gz' +from bulkload +where name = 'FB GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/rgd.gaf.gz' +from bulkload +where name = 'RGD GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'https://www.ebi.ac.uk/gxa/species/Homo_sapiens/sitemap.xml?allEntries=true' +from bulkload +where name = 'HUMAN GAF'; + +insert into bulkurlload (id, bulkloadurl) +select id, 'http://current.geneontology.org/annotations/xenbase.gaf.gz' +from bulkload +where name = 'XB GAF'; + +create table GeneOntologyAnnotation +( + id bigint PRIMARY KEY, + singlegene_id bigint, + goterm_id bigint, + dateCreated timestamp default now(), + dateUpdated timestamp, + dbDateUpdated timestamp, + dbDateCreated timestamp, + createdBy_id bigint, + updatedBy_id bigint, + internal boolean DEFAULT false, + obsolete boolean DEFAULT false +); + +CREATE SEQUENCE public.GeneOntologyAnnotation_SEQ START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; + +ALTER TABLE GeneOntologyAnnotation + ADD CONSTRAINT GeneOntologyAnnotation_gene_fk + FOREIGN KEY (singlegene_id) REFERENCES biologicalentity (id); + +ALTER TABLE GeneOntologyAnnotation + ADD CONSTRAINT GeneOntologyAnnotation_goterm_fk + FOREIGN KEY (goterm_id) REFERENCES ontologyterm (id); + +ALTER TABLE GeneOntologyAnnotation + ADD CONSTRAINT GeneOntologyAnnotation_createdBy_fk + FOREIGN KEY (createdBy_id) REFERENCES person (id); + +ALTER TABLE GeneOntologyAnnotation + ADD CONSTRAINT GeneOntologyAnnotation_updatedBy_fk + FOREIGN KEY (updatedBy_id) REFERENCES person (id); + +ALTER TABLE GeneOntologyAnnotation + ADD UNIQUE (singlegene_id, goterm_id); + +CREATE INDEX GeneOntologyAnnotation_createdby_index ON GeneOntologyAnnotation USING btree (createdby_id); + +CREATE INDEX GeneOntologyAnnotation_updatedby_index ON GeneOntologyAnnotation USING btree (updatedBy_id); + +CREATE INDEX GeneOntologyAnnotation_singlegeneId_index ON GeneOntologyAnnotation USING btree (singlegene_id); + +CREATE INDEX GeneOntologyAnnotation_gotermId_index ON GeneOntologyAnnotation USING btree (goterm_id); From 836e111d89c782551c209087b81d9c5abe3dd9c3 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 17:51:30 +0000 Subject: [PATCH 027/118] Handle whitespace an lower case ref/alt sequences --- .../services/helpers/variants/HgvsIdentifierHelper.java | 4 ++-- .../services/validation/dto/fms/VariantFmsDTOValidator.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java index 389ce0d88..bc2f04e05 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java @@ -20,12 +20,12 @@ public static String getHgvsIdentifier(VariantFmsDTO dto) { String refSeq = ""; if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence()) && !Objects.equals(dto.getGenomicReferenceSequence(), "N/A")) { - refSeq = dto.getGenomicReferenceSequence(); + refSeq = StringUtils.deleteWhitespace(dto.getGenomicReferenceSequence().toUpperCase()); } String varSeq = ""; if (StringUtils.isNotBlank(dto.getGenomicVariantSequence()) && !Objects.equals(dto.getGenomicVariantSequence(), "N/A")) { - varSeq = dto.getGenomicVariantSequence(); + varSeq = StringUtils.deleteWhitespace(dto.getGenomicVariantSequence().toUpperCase()); } String chrAccession = ""; diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java index 5ae14da84..d8594b880 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java @@ -258,13 +258,13 @@ public void validateCuratedVariantGenomicLocationAssociation(VariantFmsDTO dto, association.setRelation(vocabularyTermService.getTermInVocabulary(VocabularyConstants.LOCATION_ASSOCIATION_RELATION_VOCABULARY, "located_on").getEntity()); if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence()) && !Objects.equals(dto.getGenomicReferenceSequence(), "N/A")) { - association.setReferenceSequence(dto.getGenomicReferenceSequence()); + association.setReferenceSequence(StringUtils.deleteWhitespace(dto.getGenomicReferenceSequence())); } else { association.setReferenceSequence(null); } if (StringUtils.isNotBlank(dto.getGenomicVariantSequence()) && !Objects.equals(dto.getGenomicVariantSequence(), "N/A")) { - association.setVariantSequence(dto.getGenomicVariantSequence()); + association.setVariantSequence(StringUtils.deleteWhitespace(dto.getGenomicVariantSequence())); } else { association.setVariantSequence(null); } From fc3ad79e297a708ca1d647471db4491c5617f181 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 17:52:21 +0000 Subject: [PATCH 028/118] Bump LinkML version --- .../curation_api/constants/LinkMLSchemaConstants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/constants/LinkMLSchemaConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/LinkMLSchemaConstants.java index 4c54350c0..92cb9891d 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/LinkMLSchemaConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/LinkMLSchemaConstants.java @@ -5,7 +5,7 @@ public class LinkMLSchemaConstants { private LinkMLSchemaConstants() { // Hidden from view, as it is a utility class } - public static final String LATEST_RELEASE = "2.8.0"; + public static final String LATEST_RELEASE = "2.8.1"; public static final String MIN_ONTOLOGY_RELEASE = "1.2.4"; public static final String MAX_ONTOLOGY_RELEASE = LATEST_RELEASE; From b8b7595c0fe55e94fa4f7fea437a9acec7b08bca Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 17:58:32 +0000 Subject: [PATCH 029/118] Relax amino acid criteria --- .../validation/dto/fms/VepTranscriptFmsDTOValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java index 3f82a0a9c..523d0272d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -168,7 +168,7 @@ public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO String variantAminoAcids = null; if (StringUtils.isNotBlank(dto.getAminoAcids())) { String[] refVarAminoAcids = dto.getAminoAcids().split("/"); - if (refVarAminoAcids.length == 1 && (Objects.equals(dto.getAminoAcids(), "X") || dto.getConsequence().contains("synonymous_variant"))) { + if (refVarAminoAcids.length == 1) { referenceAminoAcids = refVarAminoAcids[0]; variantAminoAcids = refVarAminoAcids[0]; } else if (refVarAminoAcids.length == 2) { From 6590a75885b5589b353fff4df1a9db05b782f741 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 6 Nov 2024 18:12:14 +0000 Subject: [PATCH 030/118] Update test file --- .../resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json b/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json index bfa4a2564..a430c671d 100644 --- a/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json +++ b/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json @@ -8,7 +8,7 @@ "cdnaPosition": "3-800", "cdsPosition": "1-600", "proteinPosition": "246-?", - "aminoAcids": "TI", + "aminoAcids": "T/I/A", "codons": "aCc/aTc", "extra": [ "IMPACT=MODERATE", From 1c91bf1dcf13ff2233bad016cccc9e044d99943e Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 13:35:52 +0000 Subject: [PATCH 031/118] GeneGenomicLocationAssociation loading --- .../dataLoadsPage/DataLoadsComponent.js | 2 +- .../cliapp/src/service/DataLoadService.js | 3 +- .../constants/EntityFieldConstants.java | 5 + .../curation_api/constants/Gff3Constants.java | 5 + ...omicLocationAssociationCrudController.java | 36 +++++ .../GeneGenomicLocationAssociationDAO.java | 15 ++ .../enums/BackendBulkLoadType.java | 1 + ...nomicLocationAssociationCrudInterface.java | 32 +++++ .../jobs/executors/BulkLoadJobExecutor.java | 4 + .../jobs/executors/gff/Gff3GeneExecutor.java | 130 ++++++++++++++++++ .../curation_api/model/entities/Gene.java | 16 ++- .../GeneGenomicLocationAssociation.java | 82 +++++++++++ .../curation_api/services/Gff3Service.java | 45 +++++- ...GeneGenomicLocationAssociationService.java | 124 +++++++++++++++++ .../helpers/gff3/Gff3AttributesHelper.java | 15 ++ .../validation/dto/Gff3DtoValidator.java | 30 ++++ ...0.3__gene_genomic_location_association.sql | 108 +++++++++++++++ .../curation_api/Gff3BulkUploadITCase.java | 32 ++++- .../bulk/fms/08_gff_data/GFF_04_gene.json | 15 ++ 19 files changed, 693 insertions(+), 7 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/controllers/crud/geneAssociations/GeneGenomicLocationAssociationCrudController.java create mode 100644 src/main/java/org/alliancegenome/curation_api/dao/associations/geneAssociations/GeneGenomicLocationAssociationDAO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/interfaces/crud/geneAssociations/GeneGenomicLocationAssociationCrudInterface.java create mode 100644 src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/entities/associations/geneAssociations/GeneGenomicLocationAssociation.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java create mode 100644 src/main/resources/db/migration/v0.38.0.3__gene_genomic_location_association.sql create mode 100644 src/test/resources/bulk/fms/08_gff_data/GFF_04_gene.json diff --git a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js index 936baa078..960960ce3 100644 --- a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js +++ b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js @@ -708,7 +708,7 @@ export const DataLoadsComponent = () => { }; const exemptTypes = (loadType) => { - return loadType === 'GFF_EXON' || loadType === 'GFF_TRANSCRIPT' || loadType === 'GFF_CDS'; + return loadType === 'GFF_EXON' || loadType === 'GFF_TRANSCRIPT' || loadType === 'GFF_CDS' || loadType === 'GFF_GENE'; }; const fileWithinSchemaRange = (fileVersion, loadType) => { diff --git a/src/main/cliapp/src/service/DataLoadService.js b/src/main/cliapp/src/service/DataLoadService.js index 3b7a120f9..d150ad6d6 100644 --- a/src/main/cliapp/src/service/DataLoadService.js +++ b/src/main/cliapp/src/service/DataLoadService.js @@ -83,8 +83,9 @@ export class DataLoadService extends BaseAuthService { BulkFMSLoad: [ 'BIOGRID-ORCS', 'GFF', // This needs to be removed at some point - 'GFF_EXON', 'GFF_CDS', + 'GFF_EXON', + 'GFF_GENE', 'GFF_TRANSCRIPT', 'HTPDATASET', 'HTPDATASAMPLE', diff --git a/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java index 7eb0a89cd..49c1cc358 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java @@ -19,6 +19,7 @@ private EntityFieldConstants() { public static final String CODING_SEQUENCE_ASSOCIATION_SUBJECT = "codingSequenceAssociationSubject"; public static final String CONSTRUCT_ASSOCIATION_SUBJECT = "constructAssociationSubject"; public static final String EXON_ASSOCIATION_SUBJECT = "exonAssociationSubject"; + public static final String GENE_ASSOCIATION_SUBJECT = "geneAssociationSubject"; public static final String VARIANT_ASSOCIATION_SUBJECT = "variantAssociationSubject"; public static final String SQTR_ASSOCIATION_SUBJECT = "sequenceTargetingReagentAssociationSubject"; public static final String TRANSCRIPT_ASSOCIATION_SUBJECT = "transcriptAssociationSubject"; @@ -31,23 +32,27 @@ private EntityFieldConstants() { public static final String CODING_SEQUENCE_ASSOCIATION_SUBJECT_DATA_PROVIDER = CODING_SEQUENCE_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String CONSTRUCT_ASSOCIATION_SUBJECT_DATA_PROVIDER = CONSTRUCT_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String EXON_ASSOCIATION_SUBJECT_DATA_PROVIDER = EXON_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; + public static final String GENE_ASSOCIATION_SUBJECT_DATA_PROVIDER = GENE_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String VARIANT_ASSOCIATION_SUBJECT_DATA_PROVIDER = VARIANT_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String SQTR_ASSOCIATION_SUBJECT_DATA_PROVIDER = SQTR_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String TRANSCRIPT_ASSOCIATION_SUBJECT_DATA_PROVIDER = TRANSCRIPT_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String CODING_SEQUENCE_ASSOCIATION_SUBJECT_TAXON = CODING_SEQUENCE_ASSOCIATION_SUBJECT + "." + TAXON; public static final String EXON_ASSOCIATION_SUBJECT_TAXON = EXON_ASSOCIATION_SUBJECT + "." + TAXON; + public static final String GENE_ASSOCIATION_SUBJECT_TAXON = GENE_ASSOCIATION_SUBJECT + "." + TAXON; public static final String TRANSCRIPT_ASSOCIATION_SUBJECT_TAXON = TRANSCRIPT_ASSOCIATION_SUBJECT + "." + TAXON; public static final String VARIANT_ASSOCIATION_SUBJECT_TAXON = VARIANT_ASSOCIATION_SUBJECT + "." + TAXON; public static final String GENOMIC_LOCATION_ASSOCIATION_OBJECT = "GenomicLocationAssociationObject"; public static final String CODING_SEQUENCE_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "codingSequence" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; public static final String EXON_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "exon" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; + public static final String GENE_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "gene" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; public static final String TRANSCRIPT_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "transcript" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; public static final String VARIANT_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "variant" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; public static final String CODING_SEQUENCE_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "codingSequence" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; public static final String EXON_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "exon" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; public static final String TRANSCRIPT_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "transcript" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; + public static final String GENE_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "gene" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; public static final String SUBJECT_GENE_DATA_PROVIDER = "subjectGene." + DATA_PROVIDER; public static final String SUBJECT_GENE_TAXON = "subjectGene." + TAXON; diff --git a/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java b/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java index b613cf23b..06acbcd48 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java @@ -8,6 +8,11 @@ private Gff3Constants() { throw new UnsupportedOperationException("This is a utility class and cannot be instantiated"); } + public static final List GENE_TYPES = List.of( + "gene", "pseudogene", "lincRNA_gene", "lncRNA_gene", "ncRNA_gene", "rRNA_gene", + "snRNA_gene", "snoRNA_gene", "tRNA_gene", "telomerase_RNA_gene", "transposable_element_gene" + ); + public static final List TRANSCRIPT_TYPES = List.of( "mRNA", "ncRNA", "piRNA", "lincRNA", "miRNA", "pre_miRNA", "snoRNA", "lncRNA", "tRNA", "snRNA", "rRNA", "antisense_RNA", "C_gene_segment", "V_gene_segment", diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/geneAssociations/GeneGenomicLocationAssociationCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/geneAssociations/GeneGenomicLocationAssociationCrudController.java new file mode 100644 index 000000000..92c175e7f --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/geneAssociations/GeneGenomicLocationAssociationCrudController.java @@ -0,0 +1,36 @@ +package org.alliancegenome.curation_api.controllers.crud.geneAssociations; + +import java.util.List; + +import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; +import org.alliancegenome.curation_api.dao.associations.geneAssociations.GeneGenomicLocationAssociationDAO; +import org.alliancegenome.curation_api.interfaces.crud.geneAssociations.GeneGenomicLocationAssociationCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3GeneExecutor; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.associations.geneAssociations.GeneGenomicLocationAssociationService; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class GeneGenomicLocationAssociationCrudController extends BaseEntityCrudController implements GeneGenomicLocationAssociationCrudInterface { + + @Inject GeneGenomicLocationAssociationService geneGenomicLocationService; + + @Inject Gff3GeneExecutor gff3GeneExecutor; + + @Override + @PostConstruct + protected void init() { + setService(geneGenomicLocationService); + } + + public APIResponse updateGeneLocations(String dataProvider, String assembly, List gffData) { + LoadHistoryResponce resp = (LoadHistoryResponce) gff3GeneExecutor.runLoadApi(dataProvider, assembly, gffData); + return new LoadHistoryResponce(resp.getHistory()); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/dao/associations/geneAssociations/GeneGenomicLocationAssociationDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/associations/geneAssociations/GeneGenomicLocationAssociationDAO.java new file mode 100644 index 000000000..53c731649 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/associations/geneAssociations/GeneGenomicLocationAssociationDAO.java @@ -0,0 +1,15 @@ +package org.alliancegenome.curation_api.dao.associations.geneAssociations; + +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; + +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped +public class GeneGenomicLocationAssociationDAO extends BaseSQLDAO { + + protected GeneGenomicLocationAssociationDAO() { + super(GeneGenomicLocationAssociation.class); + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java index ba9ea290c..76d49d794 100644 --- a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java +++ b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java @@ -28,6 +28,7 @@ public enum BackendBulkLoadType { GFF_EXON("gff"), GFF_CDS("gff"), GFF_TRANSCRIPT("gff"), + GFF_GENE("gff"), INTERACTION_MOL("tsv"), EXPRESSION_ATLAS("tsv"), diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/geneAssociations/GeneGenomicLocationAssociationCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/geneAssociations/GeneGenomicLocationAssociationCrudInterface.java new file mode 100644 index 000000000..6b8a65d5e --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/geneAssociations/GeneGenomicLocationAssociationCrudInterface.java @@ -0,0 +1,32 @@ +package org.alliancegenome.curation_api.interfaces.crud.geneAssociations; + +import java.util.List; + +import org.alliancegenome.curation_api.interfaces.base.BaseIdCrudInterface; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; + +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType; + +@Path("/genegenomiclocation") +@Tag(name = "CRUD - GeneGenomicLocationAssociation") +@Produces(MediaType.APPLICATION_JSON) +@Consumes(MediaType.APPLICATION_JSON) +public interface GeneGenomicLocationAssociationCrudInterface extends BaseIdCrudInterface { + + @POST + @Path("/bulk/{dataProvider}_{assemblyName}/geneLocations") + @JsonView(View.FieldsAndLists.class) + APIResponse updateGeneLocations(@PathParam("dataProvider") String dataProvider, @PathParam("assemblyName") String assemblyName, List gff3Data); + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index a9174d253..fe45f99d5 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -21,6 +21,7 @@ import org.alliancegenome.curation_api.jobs.executors.associations.constructAssociations.ConstructGenomicEntityAssociationExecutor; import org.alliancegenome.curation_api.jobs.executors.gff.Gff3CDSExecutor; import org.alliancegenome.curation_api.jobs.executors.gff.Gff3ExonExecutor; +import org.alliancegenome.curation_api.jobs.executors.gff.Gff3GeneExecutor; import org.alliancegenome.curation_api.jobs.executors.gff.Gff3TranscriptExecutor; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; @@ -60,6 +61,7 @@ public class BulkLoadJobExecutor { @Inject Gff3ExonExecutor gff3ExonExecutor; @Inject Gff3CDSExecutor gff3CDSExecutor; + @Inject Gff3GeneExecutor gff3GeneExecutor; @Inject Gff3TranscriptExecutor gff3TranscriptExecutor; @Inject VepTranscriptExecutor vepTranscriptExecutor; @Inject VepGeneExecutor vepGeneExecutor; @@ -138,6 +140,8 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th gff3CDSExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_TRANSCRIPT) { gff3TranscriptExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.GFF_GENE) { + gff3GeneExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASET) { htpExpressionDatasetAnnotationExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.EXPRESSION_ATLAS) { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java new file mode 100644 index 000000000..fb00c1192 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java @@ -0,0 +1,130 @@ +package org.alliancegenome.curation_api.jobs.executors.gff; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.Gff3DTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.associations.geneAssociations.GeneGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.helpers.gff3.Gff3AttributesHelper; +import org.alliancegenome.curation_api.services.validation.dto.Gff3DtoValidator; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.lang3.tuple.ImmutablePair; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class Gff3GeneExecutor extends Gff3Executor { + + @Inject GeneGenomicLocationAssociationService geneLocationService; + @Inject Gff3DtoValidator gff3DtoValidator; + + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema gff3Schema = CsvSchemaBuilder.gff3Schema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(Gff3DTO.class).with(gff3Schema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List gffData = it.readAll(); + List gffHeaderData = new ArrayList<>(); + for (Gff3DTO gffLine : gffData) { + if (gffLine.getSeqId().startsWith("#")) { + gffHeaderData.add(gffLine.getSeqId()); + } else { + break; + } + } + gffData.subList(0, gffHeaderData.size()).clear(); + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List>> preProcessedGeneGffData = Gff3AttributesHelper.getGeneGffData(gffData, dataProvider); + + gffData.clear(); + + List locationIdsAdded = new ArrayList<>(); + String assemblyId = loadGenomeAssemblyFromGFF(gffHeaderData); + + boolean success = runLoad(bulkLoadFileHistory, gffHeaderData, preProcessedGeneGffData, locationIdsAdded, dataProvider, assemblyId); + + if (success) { + runCleanup(geneLocationService, bulkLoadFileHistory, dataProvider.name(), geneLocationService.getIdsByDataProvider(dataProvider), locationIdsAdded, "GFF gene genomic location association"); + } + bulkLoadFileHistory.finishLoad(); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + private boolean runLoad( + BulkLoadFileHistory history, + List gffHeaderData, + List>> gffData, + List locationIdsAdded, + BackendBulkDataProvider dataProvider, + String assemblyId) { + + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + ph.startProcess("GFF Gene update for " + dataProvider.name(), gffData.size()); + + String countType = "Locations"; + history.setCount(countType, gffData.size()); + updateHistory(history); + + for (ImmutablePair> gff3EntryPair : gffData) { + if (assemblyId != null) { + countType = "Locations"; + try { + gff3Service.loadGeneLocationAssociations(gff3EntryPair, locationIdsAdded, dataProvider, assemblyId); + history.incrementCompleted(countType); + } catch (ObjectUpdateException e) { + history.incrementFailed(countType); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(countType); + addException(history, new ObjectUpdateExceptionData(gff3EntryPair.getKey(), e.getMessage(), e.getStackTrace())); + } + } + ph.progressProcess(); + } + updateHistory(history); + ph.finishProcess(); + return true; + } + + public APIResponse runLoadApi(String dataProviderName, String assemblyName, List gffData) { + List idsAdded = new ArrayList<>(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + List>> preProcessedGeneGffData = Gff3AttributesHelper.getGeneGffData(gffData, dataProvider); + BulkLoadFileHistory history = new BulkLoadFileHistory(); + history = bulkLoadFileHistoryDAO.persist(history); + runLoad(history, null, preProcessedGeneGffData, idsAdded, dataProvider, assemblyName); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } + + +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java index a2ed18954..cdeaa6496 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java @@ -5,6 +5,7 @@ import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.associations.alleleAssociations.AlleleGeneAssociation; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; import org.alliancegenome.curation_api.model.entities.associations.sequenceTargetingReagentAssociations.SequenceTargetingReagentGeneAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptGeneAssociation; import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; @@ -38,7 +39,8 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(exclude = { "geneDiseaseAnnotations", "geneGeneAssociations", "geneSymbol", "geneFullName", "geneSystematicName", "geneSynonyms", "geneSecondaryIds", "alleleGeneAssociations", "sequenceTargetingReagentGeneAssociations", "transcriptGeneAssociations" }, callSuper = true) +@ToString(exclude = { "geneDiseaseAnnotations", "geneGeneAssociations", "geneSymbol", "geneFullName", "geneSystematicName", "geneSynonyms", "geneSecondaryIds", + "geneGenomicLocationAssociations", "alleleGeneAssociations", "sequenceTargetingReagentGeneAssociations", "transcriptGeneAssociations" }, callSuper = true) @Schema(name = "Gene", description = "POJO that represents the Gene") @AGRCurationSchemaVersion(min = "1.5.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { GenomicEntity.class }, partial = true) @Table(indexes = { @Index(name = "gene_genetype_index", columnList = "geneType_id") }) @@ -105,4 +107,16 @@ public class Gene extends GenomicEntity { @OneToMany(mappedBy = "transcriptGeneAssociationObject", cascade = CascadeType.ALL, orphanRemoval = true) @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) private List transcriptGeneAssociations; + + @IndexedEmbedded( + includePaths = { + "geneGenomicLocationAssociationObject.curie", "geneGenomicLocationAssociationObject.curie_keyword", + "geneGenomicLocationAssociationObject.modEntityId", "geneGenomicLocationAssociationObject.modEntityId_keyword", + "geneGenomicLocationAssociationObject.modInternalId", "geneGenomicLocationAssociationObject.modInternalId_keyword", + "start", "end" + } + ) + @OneToMany(mappedBy = "geneAssociationSubject", cascade = CascadeType.ALL, orphanRemoval = true) + @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) + private List geneGenomicLocationAssociations; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/geneAssociations/GeneGenomicLocationAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/geneAssociations/GeneGenomicLocationAssociation.java new file mode 100644 index 000000000..c829b5059 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/geneAssociations/GeneGenomicLocationAssociation.java @@ -0,0 +1,82 @@ +package org.alliancegenome.curation_api.model.entities.associations.geneAssociations; + +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.AssemblyComponent; +import org.alliancegenome.curation_api.model.entities.Gene; +import org.alliancegenome.curation_api.model.entities.LocationAssociation; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.annotations.Fetch; +import org.hibernate.annotations.FetchMode; +import org.hibernate.search.engine.backend.types.Aggregable; +import org.hibernate.search.engine.backend.types.Searchable; +import org.hibernate.search.engine.backend.types.Sortable; +import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.Index; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@Entity +@Data +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) +@ToString(callSuper = true) +@AGRCurationSchemaVersion(min = "2.4.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { LocationAssociation.class }) +@Schema(name = "GeneGenomicLocationAssociation", description = "POJO representing an association between a gene and a genomic location") + +@Table(indexes = { + @Index(name = "geneGenomicLocationAssociation_internal_index", columnList = "internal"), + @Index(name = "geneGenomicLocationAssociation_obsolete_index", columnList = "obsolete"), + @Index(name = "geneGenomicLocationAssociation_strand_index", columnList = "strand"), + @Index(name = "geneGenomicLocationAssociation_createdBy_index", columnList = "createdBy_id"), + @Index(name = "geneGenomicLocationAssociation_updatedBy_index", columnList = "updatedBy_id"), + @Index(name = "geneGenomicLocationAssociation_relation_index", columnList = "relation_id"), + @Index(name = "geneGenomicLocationAssociation_subject_index", columnList = "geneassociationsubject_id"), + @Index(name = "geneGenomicLocationAssociation_object_index", columnList = "genegenomiclocationassociationobject_id") +}) + +public class GeneGenomicLocationAssociation extends LocationAssociation { + + @IndexedEmbedded(includePaths = {"curie", "geneSymbol.displayText", "geneSymbol.formatText", "geneFullName.displayText", "geneFullName.formatText", + "curie_keyword", "geneSymbol.displayText_keyword", "geneSymbol.formatText_keyword", "geneFullName.displayText_keyword", "geneFullName.formatText_keyword", + "modEntityId", "modEntityId_keyword", "modInternalId", "modInternalId_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonIgnoreProperties({ + "geneGenomicLocationAssociations", + "sequenceTargetingReagentGeneAssociations", + "transcriptGeneAssociations", + "alleleGeneAssociations" + }) + @JsonView({ View.FieldsOnly.class }) + private Gene geneAssociationSubject; + + @IndexedEmbedded(includePaths = { + "curie", "curie_keyword", "modEntityId", "modEntityId_keyword", + "modInternalId", "modInternalId_keyword", "name", "name_keyword" + }) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @Fetch(FetchMode.JOIN) + private AssemblyComponent geneGenomicLocationAssociationObject; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "strand_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(length = 1) + private String strand; +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java index 4b41cfec2..93c23daa0 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java +++ b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java @@ -11,13 +11,15 @@ import org.alliancegenome.curation_api.dao.GenomeAssemblyDAO; import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; +import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.CodingSequence; import org.alliancegenome.curation_api.model.entities.Exon; +import org.alliancegenome.curation_api.model.entities.Gene; import org.alliancegenome.curation_api.model.entities.Transcript; import org.alliancegenome.curation_api.model.entities.associations.codingSequenceAssociations.CodingSequenceGenomicLocationAssociation; import org.alliancegenome.curation_api.model.entities.associations.exonAssociations.ExonGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptCodingSequenceAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptExonAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptGeneAssociation; @@ -26,6 +28,7 @@ import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.associations.codingSequenceAssociations.CodingSequenceGenomicLocationAssociationService; import org.alliancegenome.curation_api.services.associations.exonAssociations.ExonGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.associations.geneAssociations.GeneGenomicLocationAssociationService; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptCodingSequenceAssociationService; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptExonAssociationService; import org.alliancegenome.curation_api.services.associations.transcriptAssociations.TranscriptGeneAssociationService; @@ -51,12 +54,14 @@ public class Gff3Service { @Inject ExonGenomicLocationAssociationService exonLocationService; @Inject CodingSequenceGenomicLocationAssociationService cdsLocationService; @Inject TranscriptGenomicLocationAssociationService transcriptLocationService; + @Inject GeneGenomicLocationAssociationService geneLocationService; @Inject TranscriptGeneAssociationService transcriptGeneService; @Inject TranscriptCodingSequenceAssociationService transcriptCdsService; @Inject TranscriptExonAssociationService transcriptExonService; @Inject DataProviderService dataProviderService; @Inject NcbiTaxonTermService ncbiTaxonTermService; @Inject Gff3DtoValidator gff3DtoValidator; + @Inject GeneService geneService; @Transactional public void loadExonLocationAssociations(ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) throws ValidationException { @@ -140,6 +145,42 @@ public void loadTranscriptLocationAssociations(ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider, String assemblyId) throws ValidationException { + Gff3DTO gffEntry = gffEntryPair.getKey(); + Map attributes = gffEntryPair.getValue(); + if (StringUtils.isBlank(assemblyId)) { + throw new ObjectValidationException(gffEntry, "Cannot load associations without assembly"); + } + + if (!Gff3Constants.GENE_TYPES.contains(gffEntry.getType())) { + throw new ObjectValidationException(gffEntry, "Invalid Type: " + gffEntry.getType() + " for Gene Location"); + } + + String geneCurie; + String identifyingAttribute; + if (attributes.containsKey("gene_id")) { + geneCurie = attributes.get("gene_id"); + identifyingAttribute = "gene_id"; + } else if (attributes.containsKey("ID")) { + geneCurie = attributes.get("ID"); + identifyingAttribute = "ID"; + } else { + throw new ObjectValidationException(gffEntry, "attributes - ID - " + ValidationConstants.REQUIRED_MESSAGE); + } + + Gene gene = geneService.findByIdentifierString(geneCurie); + if (gene == null) { + throw new ObjectValidationException(gffEntry, "attributes - " + identifyingAttribute + " - " + ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("ID") + ")"); + } + + GeneGenomicLocationAssociation geneLocation = gff3DtoValidator.validateGeneLocation(gffEntry, gene, assemblyId, dataProvider); + if (geneLocation != null) { + idsAdded.add(geneLocation.getId()); + geneLocationService.addAssociationToSubject(geneLocation); + } + } + @Transactional public void loadExonParentChildAssociations(ImmutablePair> gffEntryPair, List idsAdded, BackendBulkDataProvider dataProvider) throws ValidationException { Gff3DTO gffEntry = gffEntryPair.getKey(); @@ -216,7 +257,7 @@ public Map getGeneIdCurieMap(List gffData, BackendBulkD Map geneIdCurieMap = new HashMap<>(); for (Gff3DTO gffEntry : gffData) { - if (gffEntry.getType().contains("gene")) { + if (Gff3Constants.GENE_TYPES.contains(gffEntry.getType())) { Map attributes = Gff3AttributesHelper.getAttributes(gffEntry, dataProvider); if (attributes.containsKey("gene_id") && attributes.containsKey("ID")) { geneIdCurieMap.put(attributes.get("ID"), attributes.get("gene_id")); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java new file mode 100644 index 000000000..17701acbe --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java @@ -0,0 +1,124 @@ +package org.alliancegenome.curation_api.services.associations.geneAssociations; + +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import org.alliancegenome.curation_api.constants.EntityFieldConstants; +import org.alliancegenome.curation_api.dao.PersonDAO; +import org.alliancegenome.curation_api.dao.associations.geneAssociations.GeneGenomicLocationAssociationDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ApiErrorException; +import org.alliancegenome.curation_api.model.entities.Gene; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.PersonService; +import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import org.apache.commons.lang.StringUtils; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; +import lombok.extern.jbosslog.JBossLog; + +@JBossLog +@RequestScoped +public class GeneGenomicLocationAssociationService extends BaseEntityCrudService { + + @Inject GeneGenomicLocationAssociationDAO geneGenomicLocationAssociationDAO; + @Inject PersonDAO personDAO; + @Inject PersonService personService; + + @Override + @PostConstruct + protected void init() { + setSQLDao(geneGenomicLocationAssociationDAO); + } + + + public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { + Map params = new HashMap<>(); + params.put(EntityFieldConstants.GENE_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + params.put(EntityFieldConstants.GENE_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); + } + List associationIds = geneGenomicLocationAssociationDAO.findIdsByParams(params); + associationIds.removeIf(Objects::isNull); + + return associationIds; + } + + @Override + @Transactional + public GeneGenomicLocationAssociation deprecateOrDelete(Long id, Boolean throwApiError, String loadDescription, Boolean deprecate) { + GeneGenomicLocationAssociation association = geneGenomicLocationAssociationDAO.find(id); + + if (association == null) { + String errorMessage = "Could not find GeneGenomicLocationAssociation with id: " + id; + if (throwApiError) { + ObjectResponse response = new ObjectResponse<>(); + response.addErrorMessage("id", errorMessage); + throw new ApiErrorException(response); + } + log.error(errorMessage); + return null; + } + if (deprecate) { + if (!association.getObsolete()) { + association.setObsolete(true); + if (authenticatedPerson.getId() != null) { + association.setUpdatedBy(personDAO.find(authenticatedPerson.getId())); + } else { + association.setUpdatedBy(personService.fetchByUniqueIdOrCreate(loadDescription)); + } + association.setDateUpdated(OffsetDateTime.now()); + return geneGenomicLocationAssociationDAO.persist(association); + } + return association; + } + + geneGenomicLocationAssociationDAO.remove(association.getId()); + + return null; + } + + public ObjectResponse getLocationAssociation(Long geneId, Long assemblyComponentId) { + GeneGenomicLocationAssociation association = null; + + Map params = new HashMap<>(); + params.put(EntityFieldConstants.GENE_ASSOCIATION_SUBJECT + ".id", geneId); + params.put("geneGenomicLocationAssociationObject.id", assemblyComponentId); + + SearchResponse resp = geneGenomicLocationAssociationDAO.findByParams(params); + if (resp != null && resp.getSingleResult() != null) { + association = resp.getSingleResult(); + } + + ObjectResponse response = new ObjectResponse<>(); + response.setEntity(association); + + return response; + } + + public void addAssociationToSubject(GeneGenomicLocationAssociation association) { + Gene gene = association.getGeneAssociationSubject(); + + List currentSubjectAssociations = gene.getGeneGenomicLocationAssociations(); + if (currentSubjectAssociations == null) { + currentSubjectAssociations = new ArrayList<>(); + } + + List currentSubjectAssociationIds = currentSubjectAssociations.stream() + .map(GeneGenomicLocationAssociation::getId).collect(Collectors.toList()); + + if (!currentSubjectAssociationIds.contains(association.getId())) { + currentSubjectAssociations.add(association); + } + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java index 7d5872df1..af1f0834a 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java @@ -98,6 +98,21 @@ public static List>> getTranscriptGff ph.finishProcess(); return retGffData; } + + + public static List>> getGeneGffData(List gffData, BackendBulkDataProvider dataProvider) { + List>> retGffData = new ArrayList<>(); + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.startProcess("GFF Gene pre-processing for " + dataProvider.name(), gffData.size()); + for (Gff3DTO originalGffEntry : gffData) { + if (Gff3Constants.GENE_TYPES.contains(originalGffEntry.getType())) { + processGffEntry(originalGffEntry, retGffData, dataProvider); + } + ph.progressProcess(); + } + ph.finishProcess(); + return retGffData; + } private static void processGffEntry(Gff3DTO originalGffEntry, List>> retGffData, BackendBulkDataProvider dataProvider) { Map attributes = getAttributes(originalGffEntry, dataProvider); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index 289cbedf6..b856cd886 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -13,6 +13,7 @@ import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.dao.associations.codingSequenceAssociations.CodingSequenceGenomicLocationAssociationDAO; import org.alliancegenome.curation_api.dao.associations.exonAssociations.ExonGenomicLocationAssociationDAO; +import org.alliancegenome.curation_api.dao.associations.geneAssociations.GeneGenomicLocationAssociationDAO; import org.alliancegenome.curation_api.dao.associations.transcriptAssociations.TranscriptCodingSequenceAssociationDAO; import org.alliancegenome.curation_api.dao.associations.transcriptAssociations.TranscriptExonAssociationDAO; import org.alliancegenome.curation_api.dao.associations.transcriptAssociations.TranscriptGeneAssociationDAO; @@ -30,6 +31,7 @@ import org.alliancegenome.curation_api.model.entities.Transcript; import org.alliancegenome.curation_api.model.entities.associations.codingSequenceAssociations.CodingSequenceGenomicLocationAssociation; import org.alliancegenome.curation_api.model.entities.associations.exonAssociations.ExonGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptCodingSequenceAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptExonAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptGeneAssociation; @@ -60,6 +62,7 @@ public class Gff3DtoValidator { @Inject GeneService geneService; @Inject ExonGenomicLocationAssociationDAO exonLocationDAO; @Inject TranscriptGenomicLocationAssociationDAO transcriptLocationDAO; + @Inject GeneGenomicLocationAssociationDAO geneLocationDAO; @Inject TranscriptGeneAssociationDAO transcriptGeneDAO; @Inject TranscriptExonAssociationDAO transcriptExonDAO; @Inject TranscriptCodingSequenceAssociationDAO transcriptCdsDAO; @@ -281,6 +284,33 @@ public TranscriptGenomicLocationAssociation validateTranscriptLocation(Gff3DTO g return transcriptLocationDAO.persist(locationResponse.getEntity()); } + + @Transactional + public GeneGenomicLocationAssociation validateGeneLocation(Gff3DTO gffEntry, Gene gene, String assemblyId, BackendBulkDataProvider dataProvider) throws ObjectValidationException { + AssemblyComponent assemblyComponent = null; + GeneGenomicLocationAssociation locationAssociation = new GeneGenomicLocationAssociation(); + if (StringUtils.isNotBlank(gffEntry.getSeqId())) { + assemblyComponent = assemblyComponentService.fetchOrCreate(gffEntry.getSeqId(), assemblyId, dataProvider.canonicalTaxonCurie, dataProvider); + Map params = new HashMap<>(); + params.put(EntityFieldConstants.GENE_ASSOCIATION_SUBJECT + ".id", gene.getId()); + params.put(EntityFieldConstants.GENE_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY, assemblyId); + SearchResponse locationSearchResponse = geneLocationDAO.findByParams(params); + if (locationSearchResponse != null && locationSearchResponse.getSingleResult() != null) { + locationAssociation = locationSearchResponse.getSingleResult(); + } + locationAssociation.setGeneGenomicLocationAssociationObject(assemblyComponent); + } + locationAssociation.setGeneAssociationSubject(gene); + + locationAssociation.setStrand(gffEntry.getStrand()); + + ObjectResponse locationResponse = validateLocationAssociation(locationAssociation, gffEntry, assemblyComponent); + if (locationResponse.hasErrors()) { + throw new ObjectValidationException(gffEntry, locationResponse.errorMessagesString()); + } + + return geneLocationDAO.persist(locationResponse.getEntity()); + } @Transactional public TranscriptGeneAssociation validateTranscriptGeneAssociation(Gff3DTO gffEntry, Transcript transcript, Map attributes, Map geneIdCurieMap) throws ObjectValidationException { diff --git a/src/main/resources/db/migration/v0.38.0.3__gene_genomic_location_association.sql b/src/main/resources/db/migration/v0.38.0.3__gene_genomic_location_association.sql new file mode 100644 index 000000000..43045f7a5 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.3__gene_genomic_location_association.sql @@ -0,0 +1,108 @@ +CREATE TABLE genegenomiclocationassociation ( + id bigint CONSTRAINT genegenomiclocationassociation_pkey PRIMARY KEY, + datecreated timestamp(6) with time zone, + dateupdated timestamp(6) with time zone, + dbdatecreated timestamp(6) with time zone, + dbdateupdated timestamp(6) with time zone, + internal boolean DEFAULT false NOT NULL, + obsolete boolean DEFAULT false NOT NULL, + createdby_id bigint, + updatedby_id bigint, + "start" integer, + "end" integer, + strand varchar(1), + relation_id bigint, + geneassociationsubject_id bigint, + genegenomiclocationassociationobject_id bigint +); + +ALTER TABLE genegenomiclocationassociation ADD CONSTRAINT genegenomiclocationassociation_relation_id_fk + FOREIGN KEY (relation_id) REFERENCES vocabularyterm(id); +ALTER TABLE genegenomiclocationassociation ADD CONSTRAINT genegenomiclocationassociation_gasubject_id_fk + FOREIGN KEY (geneassociationsubject_id) REFERENCES gene(id); +ALTER TABLE genegenomiclocationassociation ADD CONSTRAINT genegenomiclocationassociation_gglaobject_id_fk + FOREIGN KEY (genegenomiclocationassociationobject_id) REFERENCES assemblycomponent(id); +ALTER TABLE genegenomiclocationassociation ADD CONSTRAINT genegenomiclocationassociation_createdby_id_fk + FOREIGN KEY (createdby_id) REFERENCES person(id); +ALTER TABLE genegenomiclocationassociation ADD CONSTRAINT genegenomiclocationassociation_updatedby_id_fk + FOREIGN KEY (updatedby_id) REFERENCES person(id); + +CREATE INDEX geneGenomicLocationAssociation_internal_index ON genegenomiclocationassociation + USING btree (internal); +CREATE INDEX geneGenomicLocationAssociation_obsolete_index ON genegenomiclocationassociation + USING btree (obsolete); +CREATE INDEX geneGenomicLocationAssociation_strand_index ON genegenomiclocationassociation + USING btree (strand); +CREATE INDEX geneGenomicLocationAssociation_createdBy_index ON genegenomiclocationassociation + USING btree (createdby_id); +CREATE INDEX geneGenomicLocationAssociation_updatedBy_index ON genegenomiclocationassociation + USING btree (updatedby_id); +CREATE INDEX geneGenomicLocationAssociation_relation_index ON genegenomiclocationassociation + USING btree (relation_id); +CREATE INDEX geneGenomicLocationAssociation_subject_index ON genegenomiclocationassociation + USING btree (geneassociationsubject_id); +CREATE INDEX geneGenomicLocationAssociation_object_index ON genegenomiclocationassociation + USING btree (genegenomiclocationassociationobject_id); + +CREATE SEQUENCE genegenomiclocationassociation_seq + START WITH 1 + INCREMENT BY 50 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +CREATE TABLE genegenomiclocationassociation_informationcontententity ( + association_id bigint NOT NULL, + evidence_id bigint NOT NULL +); + +ALTER TABLE genegenomiclocationassociation_informationcontententity ADD CONSTRAINT genegla_ice_association_id_fk + FOREIGN KEY (association_id) REFERENCES genegenomiclocationassociation(id); +ALTER TABLE genegenomiclocationassociation_informationcontententity ADD CONSTRAINT genegla_ice_evidence_id_fk + FOREIGN KEY (association_id) REFERENCES informationcontententity(id); + +CREATE INDEX idxiqhlffl5a2w5p3rkqb7wbqp1m ON genegenomiclocationassociation_informationcontententity + USING btree (association_id); +CREATE INDEX idx35kn4cryxq4hb1h46lqrnixbm ON genegenomiclocationassociation_informationcontententity + USING btree (evidence_id); + +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'FB GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'Human GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'MGI GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'RGD GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'SGD GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'WB GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'XBXL GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'XBXT GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'GFF_GENE', 'ZFIN GFF Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) GFF Loads'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) + SELECT id, '0 0 22 ? * SUN-THU', false FROM bulkload WHERE backendbulkloadtype = 'GFF_GENE'; + +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'FB' FROM bulkload WHERE name = 'FB GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'HUMAN' FROM bulkload WHERE name = 'Human GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'MGI' FROM bulkload WHERE name = 'MGI GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'RGD' FROM bulkload WHERE name = 'RGD GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'SGD' FROM bulkload WHERE name = 'SGD GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'WB' FROM bulkload WHERE name = 'WB GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'XBXL' FROM bulkload WHERE name = 'XBXL GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'XBXT' FROM bulkload WHERE name = 'XBXT GFF Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'GFF', 'ZFIN' FROM bulkload WHERE name = 'ZFIN GFF Gene Load'; \ No newline at end of file diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index 205203b3c..1ea345976 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -43,20 +43,23 @@ public void init() { private final String transcriptBulkPostEndpoint = "/api/transcript/bulk/WB_WBcel235/transcripts"; private final String exonBulkPostEndpoint = "/api/exon/bulk/WB_WBcel235/exons"; private final String cdsBulkPostEndpoint = "/api/cds/bulk/WB_WBcel235/codingSequences"; + private final String geneLocationBulkPostEndpoint = "/api/genegenomiclocation/bulk/WB_WBcel235/geneLocations"; private final String gffDataTestFilePath = "src/test/resources/bulk/fms/08_gff_data/"; private final String transcriptGetEndpoint = "/api/transcript/"; private final String exonGetEndpoint = "/api/exon/"; private final String cdsGetEndpoint = "/api/cds/"; + private final String geneGetEndpoint = "/api/gene/"; private final String transcriptId = "WB:Y74C9A.2a.1"; private final String exonUniqueId = "WB:Y74C9A.2a_exon|WB:Y74C9A.2a.1|I|1|100|+"; private final String cdsUniqueId = "WB:Y74C9A.2a|WB:Y74C9A.2a.1|I|10|100|+"; + private final String geneCurie = "WB:WBGene00022276"; private void loadRequiredEntities() throws Exception { createSoTerm("SO:0000234", "mRNA", false); createSoTerm("SO:0001035", "piRNA", false); createSoTerm("SO:0000147", "exon", false); createSoTerm("SO:0000316", "CDS", false); - createGene("WB:WBGene00022276", "NCBITaxon:6239", getVocabularyTerm(getVocabulary(VocabularyConstants.NAME_TYPE_VOCABULARY), "nomenclature_symbol"), false); + createGene(geneCurie, "NCBITaxon:6239", getVocabularyTerm(getVocabulary(VocabularyConstants.NAME_TYPE_VOCABULARY), "nomenclature_symbol"), false); } @Test @@ -93,7 +96,7 @@ public void gff3DataBulkUploadTranscriptEntity() throws Exception { body("entity.transcriptGenomicLocationAssociations[0].strand", is("+")). body("entity.transcriptGeneAssociations", hasSize(1)). body("entity.transcriptGeneAssociations[0].relation.name", is("is_child_of")). - body("entity.transcriptGeneAssociations[0].transcriptGeneAssociationObject.modEntityId", is("WB:WBGene00022276")); + body("entity.transcriptGeneAssociations[0].transcriptGeneAssociationObject.modEntityId", is(geneCurie)); } @@ -267,5 +270,30 @@ public void gff3DataBulkUploadInvalidFields() throws Exception { checkBulkLoadRecordCounts(exonBulkPostEndpoint, gffDataTestFilePath + "IV_04_invalid_exon_parent.json", params); checkBulkLoadRecordCounts(cdsBulkPostEndpoint, gffDataTestFilePath + "IV_05_invalid_cds_parent.json", params); } + + @Test + @Order(8) + public void gff3DataBulkUploadGeneLocation() throws Exception { + HashMap> params = new HashMap<>(); + params.put("Locations", createCountParams(1, 0, 1, 0)); + + checkBulkLoadRecordCounts(geneLocationBulkPostEndpoint, gffDataTestFilePath + "GFF_04_gene.json", params); + + RestAssured.given(). + when(). + get(geneGetEndpoint + geneCurie). + then(). + statusCode(200). + body("entity.modEntityId", is(geneCurie)). + body("entity.geneGenomicLocationAssociations", hasSize(1)). + body("entity.geneGenomicLocationAssociations[0].relation.name", is("located_on")). + body("entity.geneGenomicLocationAssociations[0].geneGenomicLocationAssociationObject.name", is("I")). + body("entity.geneGenomicLocationAssociations[0].geneGenomicLocationAssociationObject.modEntityId", is("RefSeq:NC_003279.8")). + body("entity.geneGenomicLocationAssociations[0].geneGenomicLocationAssociationObject.taxon.curie", is("NCBITaxon:6239")). + body("entity.geneGenomicLocationAssociations[0].start", is(1)). + body("entity.geneGenomicLocationAssociations[0].end", is(1005)). + body("entity.geneGenomicLocationAssociations[0].strand", is("+")); + + } } diff --git a/src/test/resources/bulk/fms/08_gff_data/GFF_04_gene.json b/src/test/resources/bulk/fms/08_gff_data/GFF_04_gene.json new file mode 100644 index 000000000..911bec308 --- /dev/null +++ b/src/test/resources/bulk/fms/08_gff_data/GFF_04_gene.json @@ -0,0 +1,15 @@ +[ + { + "seqId": "I", + "source": "WormBase", + "type": "gene", + "start": 1, + "end": 1005, + "strand": "+", + "attributes": [ + "ID=Gene:WBGene00022276", + "Name=TestGene", + "gene_id=WB:WBGene00022276" + ] + } +] From 07ef5c83096704e8463d730bf1f22e368e6340ea Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Thu, 7 Nov 2024 09:21:55 -0500 Subject: [PATCH 032/118] Fixed bug with synonyms --- .../services/base/BaseOntologyTermService.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/base/BaseOntologyTermService.java b/src/main/java/org/alliancegenome/curation_api/services/base/BaseOntologyTermService.java index a612843b6..01d798db8 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/base/BaseOntologyTermService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/base/BaseOntologyTermService.java @@ -222,9 +222,9 @@ private void handleSynonyms(OntologyTerm dbTerm, OntologyTerm incomingTerm) { } else { newSynonyms = incomingTerm.getSynonyms().stream().collect(Collectors.toSet()); } - List newSynonymNames = currentSynonyms.stream().map(Synonym::getName).collect(Collectors.toList()); + List newSynonymNames = newSynonyms.stream().map(Synonym::getName).collect(Collectors.toList()); - newSynonyms.forEach(syn -> { + for (Synonym syn: newSynonyms) { if (!currentSynonymNames.contains(syn.getName())) { SearchResponse response = synonymDAO.findByField("name", syn.getName()); Synonym synonym; @@ -235,13 +235,12 @@ private void handleSynonyms(OntologyTerm dbTerm, OntologyTerm incomingTerm) { } dbTerm.getSynonyms().add(synonym); } - }); - - currentSynonyms.forEach(syn -> { + } + for (Synonym syn: currentSynonyms) { if (!newSynonymNames.contains(syn.getName())) { dbTerm.getSynonyms().remove(syn); } - }); + } } From 53e8e2134072e6e592c46b04559575e904bfbba2 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 14:30:31 +0000 Subject: [PATCH 033/118] Skip unrecognised genes --- .../curation_api/services/Gff3Service.java | 3 ++- .../curation_api/Gff3BulkUploadITCase.java | 6 ++++++ .../fms/08_gff_data/UR_01_unrecognised_gene.json | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/bulk/fms/08_gff_data/UR_01_unrecognised_gene.json diff --git a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java index 93c23daa0..53992dc80 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java +++ b/src/main/java/org/alliancegenome/curation_api/services/Gff3Service.java @@ -11,6 +11,7 @@ import org.alliancegenome.curation_api.dao.GenomeAssemblyDAO; import org.alliancegenome.curation_api.dao.TranscriptDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.CodingSequence; @@ -171,7 +172,7 @@ public void loadGeneLocationAssociations(ImmutablePair Date: Thu, 7 Nov 2024 14:35:25 +0000 Subject: [PATCH 034/118] Prettify --- .../cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js index 960960ce3..19cc43d00 100644 --- a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js +++ b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js @@ -708,7 +708,9 @@ export const DataLoadsComponent = () => { }; const exemptTypes = (loadType) => { - return loadType === 'GFF_EXON' || loadType === 'GFF_TRANSCRIPT' || loadType === 'GFF_CDS' || loadType === 'GFF_GENE'; + return ( + loadType === 'GFF_EXON' || loadType === 'GFF_TRANSCRIPT' || loadType === 'GFF_CDS' || loadType === 'GFF_GENE' + ); }; const fileWithinSchemaRange = (fileVersion, loadType) => { From c4b5c6713f2a300a71a0c39afefa72130785f208 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 14:56:04 +0000 Subject: [PATCH 035/118] Catch KnownIssueValidationException --- .../curation_api/jobs/executors/gff/Gff3GeneExecutor.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java index fb00c1192..0e5cc4c4b 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java @@ -7,6 +7,7 @@ import java.util.zip.GZIPInputStream; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; @@ -26,6 +27,7 @@ import com.fasterxml.jackson.dataformat.csv.CsvParser; import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import io.quarkus.logging.Log; import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; @@ -101,6 +103,9 @@ private boolean runLoad( } catch (ObjectUpdateException e) { history.incrementFailed(countType); addException(history, e.getData()); + } catch (KnownIssueValidationException e) { + Log.debug(e.getMessage()); + history.incrementSkipped(); } catch (Exception e) { e.printStackTrace(); history.incrementFailed(countType); From 796b6e6b8149c59318b78b9af08d096eb5588c25 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 15:03:48 +0000 Subject: [PATCH 036/118] Fix counts check --- .../org/alliancegenome/curation_api/Gff3BulkUploadITCase.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index 7a40cefee..9d166187c 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -299,7 +299,9 @@ public void gff3DataBulkUploadGeneLocation() throws Exception { @Test @Order(9) public void gff3SkipUnrecognisedGene() throws Exception { - checkSkippedBulkLoad(geneLocationBulkPostEndpoint, gffDataTestFilePath + "UR_01_unrecognised_gene.json"); + HashMap> params = new HashMap<>(); + params.put("Locations", createCountParams(1, 0, 0, 1)); + checkBulkLoadRecordCounts(geneLocationBulkPostEndpoint, gffDataTestFilePath + "UR_01_unrecognised_gene.json", params); } } From 1a9018c4bbe9307d3434b2bc47ac9b3e2111d409 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 15:12:22 +0000 Subject: [PATCH 037/118] Add count type --- .../curation_api/jobs/executors/gff/Gff3GeneExecutor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java index 0e5cc4c4b..f4834db0f 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/gff/Gff3GeneExecutor.java @@ -105,7 +105,7 @@ private boolean runLoad( addException(history, e.getData()); } catch (KnownIssueValidationException e) { Log.debug(e.getMessage()); - history.incrementSkipped(); + history.incrementSkipped(countType); } catch (Exception e) { e.printStackTrace(); history.incrementFailed(countType); From a11d21594f8951b38b662ab3d6f0b8080018d4a5 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Thu, 7 Nov 2024 10:18:25 -0500 Subject: [PATCH 038/118] Removed files historys when loads don't run --- .../curation_api/jobs/processors/BulkLoadProcessor.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java index 86c18ec11..8b1adb526 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java @@ -148,6 +148,8 @@ protected void processFilePath(BulkLoad bulkLoad, String localFilePath, Boolean Log.info("Cleaning up downloaded file: " + localFilePath); new File(localFilePath).delete(); bulkLoadFile.setLocalFilePath(null); + Log.info("File already exists not running load"); + return; } history.setBulkLoad(bulkLoad); From a6803a1cd106fa9d800005e655cd2f3c4d11b692 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Thu, 7 Nov 2024 10:27:33 -0500 Subject: [PATCH 039/118] Simplified logic for loads --- .../jobs/processors/BulkLoadProcessor.java | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java index 8b1adb526..e61b1a910 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java @@ -131,6 +131,12 @@ protected void processFilePath(BulkLoad bulkLoad, String localFilePath, Boolean bulkLoadFile.setBulkloadCleanUp(BulkLoadCleanUp.YES); } bulkLoadFileDAO.persist(bulkLoadFile); + history.setBulkLoad(bulkLoad); + history.setBulkLoadFile(bulkLoadFile); + bulkLoadFileHistoryDAO.persist(history); + + Log.info("Firing Pending Bulk File History Event: " + history.getId()); + pendingFileJobEvents.fireAsync(new PendingLoadJobEvent(history.getId())); } else if (load.getBulkloadStatus().isForced()) { bulkLoadFile = bulkLoadFiles.getResults().get(0); if (history.getBulkloadStatus().isNotRunning()) { @@ -142,27 +148,26 @@ protected void processFilePath(BulkLoad bulkLoad, String localFilePath, Boolean Log.info("Cleaning up downloaded file: " + localFilePath); new File(localFilePath).delete(); } + if (cleanUp) { + bulkLoadFile.setBulkloadCleanUp(BulkLoadCleanUp.YES); + } + bulkLoadFileDAO.merge(bulkLoadFile); + history.setBulkLoad(bulkLoad); + history.setBulkLoadFile(bulkLoadFile); + bulkLoadFileHistoryDAO.persist(history); + + Log.info("Firing Pending Bulk File History Event: " + history.getId()); + pendingFileJobEvents.fireAsync(new PendingLoadJobEvent(history.getId())); } else { Log.info("Bulk File already exists not creating it"); bulkLoadFile = bulkLoadFiles.getResults().get(0); Log.info("Cleaning up downloaded file: " + localFilePath); new File(localFilePath).delete(); bulkLoadFile.setLocalFilePath(null); + bulkLoadFileDAO.merge(bulkLoadFile); Log.info("File already exists not running load"); - return; } - - history.setBulkLoad(bulkLoad); - history.setBulkLoadFile(bulkLoadFile); - bulkLoadFileHistoryDAO.persist(history); - if (cleanUp) { - bulkLoadFile.setBulkloadCleanUp(BulkLoadCleanUp.YES); - } - bulkLoadFileDAO.merge(bulkLoadFile); - bulkLoadDAO.merge(load); - Log.info("Firing Pending Bulk File History Event: " + history.getId()); - pendingFileJobEvents.fireAsync(new PendingLoadJobEvent(history.getId())); } protected void startLoad(BulkLoad load) { From 917225560d73f5a6f72fcfd87064001b4faa2554 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 23:23:35 +0000 Subject: [PATCH 040/118] Ensure gene_id attribute has prefix --- .../services/helpers/gff3/Gff3AttributesHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java index af1f0834a..4444cc999 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/gff3/Gff3AttributesHelper.java @@ -27,7 +27,7 @@ public static Map getAttributes(Gff3DTO dto, BackendBulkDataProv } // Ensure identifiers have MOD prefix - for (String key : List.of("ID", "Parent")) { + for (String key : List.of("ID", "Parent", "gene_id")) { if (attributes.containsKey(key)) { String idsString = attributes.get(key); if (StringUtils.equals(dataProvider.sourceOrganization, "WB")) { From 4234159e4b3ca858e56241366d271a0cae2e7ad0 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 7 Nov 2024 23:25:31 +0000 Subject: [PATCH 041/118] Fix cleanup bug for XB GFF loads --- .../curation_api/services/CodingSequenceService.java | 2 +- .../org/alliancegenome/curation_api/services/ExonService.java | 2 +- .../alliancegenome/curation_api/services/TranscriptService.java | 2 +- .../CodingSequenceGenomicLocationAssociationService.java | 2 +- .../exonAssociations/ExonGenomicLocationAssociationService.java | 2 +- .../geneAssociations/GeneGenomicLocationAssociationService.java | 2 +- .../TranscriptCodingSequenceAssociationService.java | 2 +- .../TranscriptExonAssociationService.java | 2 +- .../TranscriptGeneAssociationService.java | 2 +- .../TranscriptGenomicLocationAssociationService.java | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/CodingSequenceService.java b/src/main/java/org/alliancegenome/curation_api/services/CodingSequenceService.java index badc7798b..62fcfdcd3 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/CodingSequenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/CodingSequenceService.java @@ -34,7 +34,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); } List ids = codingSequenceDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/ExonService.java b/src/main/java/org/alliancegenome/curation_api/services/ExonService.java index d5cbfddda..09710dfb4 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/ExonService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/ExonService.java @@ -32,7 +32,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); } List ids = exonDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/TranscriptService.java b/src/main/java/org/alliancegenome/curation_api/services/TranscriptService.java index abc9f8fbf..4c8816b8e 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/TranscriptService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/TranscriptService.java @@ -34,7 +34,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); } List ids = transcriptDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/codingSequenceAssociations/CodingSequenceGenomicLocationAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/codingSequenceAssociations/CodingSequenceGenomicLocationAssociationService.java index 0ca58b1fb..2af2decd9 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/codingSequenceAssociations/CodingSequenceGenomicLocationAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/codingSequenceAssociations/CodingSequenceGenomicLocationAssociationService.java @@ -45,7 +45,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.CODING_SEQUENCE_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.CODING_SEQUENCE_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = codingSequenceGenomicLocationAssociationDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/exonAssociations/ExonGenomicLocationAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/exonAssociations/ExonGenomicLocationAssociationService.java index d6840d409..adb1a45c7 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/exonAssociations/ExonGenomicLocationAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/exonAssociations/ExonGenomicLocationAssociationService.java @@ -45,7 +45,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.EXON_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.EXON_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = exonGenomicLocationAssociationDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java index 17701acbe..da6b25f85 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/geneAssociations/GeneGenomicLocationAssociationService.java @@ -45,7 +45,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.GENE_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.GENE_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = geneGenomicLocationAssociationDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptCodingSequenceAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptCodingSequenceAssociationService.java index ca9bc103e..b859c62b8 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptCodingSequenceAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptCodingSequenceAssociationService.java @@ -46,7 +46,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = transcriptCodingSequenceAssociationDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptExonAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptExonAssociationService.java index 57602635c..34cbbff5d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptExonAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptExonAssociationService.java @@ -46,7 +46,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = transcriptExonAssociationDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGeneAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGeneAssociationService.java index 256d9a003..c0cf3d08f 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGeneAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGeneAssociationService.java @@ -46,7 +46,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = transcriptGeneAssociationDAO.findIdsByParams(params); diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGenomicLocationAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGenomicLocationAssociationService.java index babdc301f..34cb99c19 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGenomicLocationAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/transcriptAssociations/TranscriptGenomicLocationAssociationService.java @@ -45,7 +45,7 @@ protected void init() { public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_DATA_PROVIDER, dataProvider.sourceOrganization); - if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { params.put(EntityFieldConstants.TRANSCRIPT_ASSOCIATION_SUBJECT_TAXON, dataProvider.canonicalTaxonCurie); } List associationIds = transcriptGenomicLocationAssociationDAO.findIdsByParams(params); From 42a29ce8b517aec59b627c7d6e424ae46834f6ae Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Wed, 6 Nov 2024 19:12:51 -0500 Subject: [PATCH 042/118] Adding vocabulary terms to genetic sex vocabulary --- ...atasetSampleAnnotationFmsDTOValidator.java | 28 +++++++++++++++---- ..._adding_synonyms_htpsample_genetic_sex.sql | 2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 src/main/resources/db/migration/v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index b441b5798..74cec096e 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -29,6 +29,7 @@ import org.alliancegenome.curation_api.model.ingest.dto.fms.BioSampleGenomicInformationFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.HTPExpressionDatasetSampleAnnotationFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.WhereExpressedFmsDTO; +import org.alliancegenome.curation_api.model.input.Pagination; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.AffectedGenomicModelService; @@ -167,7 +168,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } else if (htpSampleAnnotation.getGenomicInformation().getBioSampleAllele() != null) { identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAllele().getIdentifier(); } - if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId())) { + if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId()) || (htpSampleAnnotation.getGenomicInformation().getBioSampleAgmType() == null && StringUtils.isNotEmpty(dto.getGenomicInformation().getIdType()))) { validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); } if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText())) { @@ -181,9 +182,22 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } if (StringUtils.isNotEmpty(dto.getSex())) { - VocabularyTerm geneticSex = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.GENETIC_SEX_VOCABULARY, dto.getSex()).getEntity(); - if (geneticSex != null) { - htpSampleAnnotation.setGeneticSex(geneticSex); + Map params = new HashMap<>(); + params.put("name", dto.getSex()); + params.put("query_operator", "or"); + params.put("synonyms", dto.getSex()); + SearchResponse searchResponse = vocabularyTermService.findByParams(new Pagination(), params); + boolean added = false; + if (searchResponse.getTotalResults() > 0) { + for (VocabularyTerm tag : searchResponse.getResults()) { + if (tag.getVocabulary().getName().equals("Genetic Sex") && (tag.getName().equals(dto.getSex()) || tag.getSynonyms().contains(dto.getSex()))) { + htpSampleAnnotation.setGeneticSex(tag); + added = true; + } + } + } + if (!added) { + htpSampleAnnotationResponse.addErrorMessage("Sex", ValidationConstants.INVALID_MESSAGE + " (" + dto.getSex() + ")"); } } @@ -221,7 +235,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } if (StringUtils.isNotEmpty(dto.getSequencingFormat())) { - VocabularyTerm sequencingFormat = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.HTP_DATASET_SAMPLE_SEQUENCE_FORMAT_VOCABULARY, dto.getSequencingFormat()).getEntity(); + VocabularyTerm sequencingFormat = vocabularyTermService.getTermInVocabulary(VocabularyConstants.HTP_DATASET_SAMPLE_SEQUENCE_FORMAT_VOCABULARY, dto.getSequencingFormat()).getEntity(); if (sequencingFormat != null) { htpSampleAnnotation.setSequencingFormat(sequencingFormat); } @@ -281,9 +295,11 @@ protected void validateGenomicInformation(BioSampleGenomicInformationFmsDTO dto, htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - BioSampleId", ValidationConstants.INVALID_MESSAGE + " (" + identifierString + ")"); } else { htpSampleAnnotation.getGenomicInformation().setBioSampleAgm(agm); - VocabularyTerm agmType = vocabularyTermService.getTermInVocabularyTermSet(VocabularyConstants.AGM_SUBTYPE_VOCABULARY, dto.getIdType()).getEntity(); + VocabularyTerm agmType = vocabularyTermService.getTermInVocabulary(VocabularyConstants.AGM_SUBTYPE_VOCABULARY, dto.getIdType()).getEntity(); if (agmType != null) { htpSampleAnnotation.getGenomicInformation().setBioSampleAgmType(agmType); + } else { + htpSampleAnnotationResponse.addErrorMessage("GenomicInformation - IdType", ValidationConstants.INVALID_MESSAGE + " (" + dto.getIdType() + ")"); } } } diff --git a/src/main/resources/db/migration/v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql b/src/main/resources/db/migration/v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql new file mode 100644 index 000000000..38311b0f4 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql @@ -0,0 +1,2 @@ +INSERT INTO vocabularyterm_synonyms (vocabularyterm_id, synonyms) SELECT id, 'unknown' FROM vocabularyterm WHERE name = 'unknown sex'; +INSERT INTO vocabularyterm_synonyms (vocabularyterm_id, synonyms) SELECT id, 'pooled' FROM vocabularyterm WHERE name = 'pooled sexes'; From 531d8b628a43bea807458156a35d01ac716927e0 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 7 Nov 2024 20:50:14 -0600 Subject: [PATCH 043/118] changing migration version --- .../HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 2 +- ...sql => v0.38.0.4__adding_synonyms_htpsample_genetic_sex.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/main/resources/db/migration/{v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql => v0.38.0.4__adding_synonyms_htpsample_genetic_sex.sql} (100%) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 74cec096e..83f9f3fee 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -168,7 +168,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn } else if (htpSampleAnnotation.getGenomicInformation().getBioSampleAllele() != null) { identifierString = htpSampleAnnotation.getGenomicInformation().getBioSampleAllele().getIdentifier(); } - if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId()) || (htpSampleAnnotation.getGenomicInformation().getBioSampleAgmType() == null && StringUtils.isNotEmpty(dto.getGenomicInformation().getIdType()))) { + if (!identifierString.equals(dto.getGenomicInformation().getBiosampleId()) || htpSampleAnnotation.getGenomicInformation().getBioSampleAgmType() == null && StringUtils.isNotEmpty(dto.getGenomicInformation().getIdType())) { validateGenomicInformation(dto.getGenomicInformation(), htpSampleAnnotation, htpSampleAnnotationResponse); } if (StringUtils.isNotEmpty(dto.getGenomicInformation().getBioSampleText())) { diff --git a/src/main/resources/db/migration/v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql b/src/main/resources/db/migration/v0.38.0.4__adding_synonyms_htpsample_genetic_sex.sql similarity index 100% rename from src/main/resources/db/migration/v0.38.0.3__adding_synonyms_htpsample_genetic_sex.sql rename to src/main/resources/db/migration/v0.38.0.4__adding_synonyms_htpsample_genetic_sex.sql From 95316449720c0f9ac613ea51bfd0c6f467eb9c62 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Fri, 8 Nov 2024 10:54:43 +0100 Subject: [PATCH 044/118] Add convenience method for genetic modifiers --- .../model/entities/DiseaseAnnotation.java | 117 +++++++++--------- 1 file changed, 61 insertions(+), 56 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java index 96b84b062..be26aa9e2 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java @@ -1,13 +1,20 @@ package org.alliancegenome.curation_api.model.entities; -import java.util.List; - +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonSubTypes.Type; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.*; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.bridges.BooleanValueBridge; import org.alliancegenome.curation_api.model.entities.ontology.DOTerm; import org.alliancegenome.curation_api.model.entities.ontology.ECOTerm; import org.alliancegenome.curation_api.view.View; +import org.apache.commons.collections4.CollectionUtils; import org.eclipse.microprofile.openapi.annotations.media.Schema; import org.hibernate.search.engine.backend.types.Aggregable; import org.hibernate.search.engine.backend.types.Searchable; @@ -19,25 +26,9 @@ import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonSubTypes.Type; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.annotation.JsonView; - -import jakarta.persistence.Column; -import jakarta.persistence.Entity; -import jakarta.persistence.Index; -import jakarta.persistence.Inheritance; -import jakarta.persistence.InheritanceType; -import jakarta.persistence.JoinColumn; -import jakarta.persistence.JoinTable; -import jakarta.persistence.ManyToMany; -import jakarta.persistence.ManyToOne; -import jakarta.persistence.Table; -import jakarta.persistence.Transient; -import lombok.Data; -import lombok.EqualsAndHashCode; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; @Inheritance(strategy = InheritanceType.JOINED) @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") @@ -49,7 +40,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { Annotation.class }) +@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {Annotation.class}) @Schema(name = "Disease_Annotation", description = "Annotation class representing a disease annotation") @Table(indexes = { @Index(name = "DiseaseAnnotation_internal_index", columnList = "internal"), @@ -73,29 +64,29 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"curie", "name", "secondaryIdentifiers", "synonyms.name", "namespace", - "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "namespace_keyword" }) + "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "namespace_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private DOTerm diseaseAnnotationObject; @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer", valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) @KeywordField(name = "negated_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) @Column(columnDefinition = "boolean default false", nullable = false) private Boolean negated = false; @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm relation; @IndexedEmbedded(includePaths = {"curie", "name", "secondaryIdentifiers", "synonyms.name", "abbreviation", - "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "abbreviation_keyword" }) + "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "abbreviation_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( joinColumns = @JoinColumn(name = "diseaseannotation_id"), inverseJoinColumns = @JoinColumn(name = "evidencecodes_id"), @@ -107,16 +98,16 @@ public abstract class DiseaseAnnotation extends Annotation { private List evidenceCodes; @IndexedEmbedded(includePaths = { - "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", - "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", - "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", - "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", - "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", - "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" + "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", + "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", + "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", + "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", + "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", + "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( joinColumns = @JoinColumn(name = "diseaseannotation_id"), inverseJoinColumns = @JoinColumn(name = "with_id"), @@ -130,13 +121,13 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm annotationType; @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( joinColumns = @JoinColumn(name = "diseaseannotation_id"), inverseJoinColumns = @JoinColumn(name = "diseasequalifiers_id"), @@ -150,27 +141,27 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm geneticSex; @IndexedEmbedded(includePaths = {"sourceOrganization.abbreviation", "sourceOrganization.fullName", "sourceOrganization.shortName", "crossReference.displayName", "crossReference.referencedCurie", - "sourceOrganization.abbreviation_keyword", "sourceOrganization.fullName_keyword", "sourceOrganization.shortName_keyword", "crossReference.displayName_keyword", "crossReference.referencedCurie_keyword"}) + "sourceOrganization.abbreviation_keyword", "sourceOrganization.fullName_keyword", "sourceOrganization.shortName_keyword", "crossReference.displayName_keyword", "crossReference.referencedCurie_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private DataProvider secondaryDataProvider; @IndexedEmbedded(includePaths = { - "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", - "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", - "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", - "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", - "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", - "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" + "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", + "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", + "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", + "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", + "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", + "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( name = "diseaseannotation_modifiergene", joinColumns = @JoinColumn(name = "diseaseannotation_id"), @@ -183,15 +174,15 @@ public abstract class DiseaseAnnotation extends Annotation { private List diseaseGeneticModifierGenes; @IndexedEmbedded(includePaths = { - "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", - "alleleSymbol.formatText", "alleleSymbol.displayText", "alleleSymbol.formatText_keyword", "alleleSymbol.displayText_keyword", - "alleleFullName.formatText", "alleleFullName.displayText", "alleleFullName.formatText_keyword", "alleleFullName.displayText_keyword", - "alleleSynonyms.formatText", "alleleSynonyms.displayText", "alleleSynonyms.formatText_keyword", "alleleSynonyms.displayText_keyword", - "alleleSecondaryIds.secondaryId", "alleleSecondaryIds.secondaryId_keyword" + "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", + "alleleSymbol.formatText", "alleleSymbol.displayText", "alleleSymbol.formatText_keyword", "alleleSymbol.displayText_keyword", + "alleleFullName.formatText", "alleleFullName.displayText", "alleleFullName.formatText_keyword", "alleleFullName.displayText_keyword", + "alleleSynonyms.formatText", "alleleSynonyms.displayText", "alleleSynonyms.formatText_keyword", "alleleSynonyms.displayText_keyword", + "alleleSecondaryIds.secondaryId", "alleleSecondaryIds.secondaryId_keyword" }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( name = "diseaseannotation_modifierallele", joinColumns = @JoinColumn(name = "diseaseannotation_id"), @@ -206,7 +197,7 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword", "curie", "curie_keyword", "modEntityId", "modEntityId_keyword", "modInternalId", "modInternalId_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( name = "diseaseannotation_modifieragm", joinColumns = @JoinColumn(name = "diseaseannotation_id"), @@ -218,10 +209,24 @@ public abstract class DiseaseAnnotation extends Annotation { ) private List diseaseGeneticModifierAgms; + public List getDiseaseGeneticModifiers() { + List geneticModifiers = new ArrayList<>(); + if (CollectionUtils.isNotEmpty(getDiseaseGeneticModifierAlleles())) { + geneticModifiers.addAll(getDiseaseGeneticModifierAlleles().stream().filter(Objects::nonNull).toList()); + } + if (CollectionUtils.isNotEmpty(getDiseaseGeneticModifierGenes())) { + geneticModifiers.addAll(getDiseaseGeneticModifierGenes().stream().filter(Objects::nonNull).toList()); + } + if (CollectionUtils.isNotEmpty(getDiseaseGeneticModifierAgms())) { + geneticModifiers.addAll(getDiseaseGeneticModifierAgms().stream().filter(Objects::nonNull).toList()); + } + return geneticModifiers; + } + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm diseaseGeneticModifierRelation; @Transient @@ -232,7 +237,7 @@ public abstract class DiseaseAnnotation extends Annotation { @Transient public abstract String getSubjectSpeciesName(); - + @Transient public abstract String getSubjectIdentifier(); From bd8d01f4379e5f001b59bf69e01c2a5765aaee30 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Fri, 8 Nov 2024 11:58:40 -0500 Subject: [PATCH 045/118] Added fix for curation cerebro admin link --- .../curation_api/controllers/APIVersionInfoController.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/APIVersionInfoController.java b/src/main/java/org/alliancegenome/curation_api/controllers/APIVersionInfoController.java index 3f2d12494..b1c51348a 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/APIVersionInfoController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/APIVersionInfoController.java @@ -59,7 +59,12 @@ public APIVersionInfo get() { info.setName(name); info.setAgrCurationSchemaVersions(linkMLClassVersions); info.setSubmittedClassSchemaVersions(submittedClassVersions); - info.setEsHost(esHost); + String[] array = esHost.split(","); + if (array.length > 0) { + info.setEsHost(array[0]); + } else { + info.setEsHost(esHost); + } info.setEnv(env); return info; } From d9d1b5c6eb20dcdfb5bf677e41164e1047b5126d Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Fri, 8 Nov 2024 12:24:15 -0600 Subject: [PATCH 046/118] Changed the ES protocol to http --- src/main/cliapp/src/containers/layout/SiteLayout.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/cliapp/src/containers/layout/SiteLayout.js b/src/main/cliapp/src/containers/layout/SiteLayout.js index 7c8838a6b..274a7c61c 100644 --- a/src/main/cliapp/src/containers/layout/SiteLayout.js +++ b/src/main/cliapp/src/containers/layout/SiteLayout.js @@ -375,7 +375,7 @@ export const SiteLayout = (props) => { { label: 'Search index UI (cerebro)', icon: 'pi pi-fw pi-home', - url: `http://cerebro.alliancegenome.org:9000/#!/overview?host=https://${siteContext?.apiVersion?.esHost}`, + url: `http://cerebro.alliancegenome.org:9000/#!/overview?host=http://${siteContext?.apiVersion?.esHost}`, target: '_blank', }, { From 88a73c24a8724bfa32c33c8cf5e78cbbc88634d6 Mon Sep 17 00:00:00 2001 From: Olin Blodgett Date: Fri, 8 Nov 2024 16:12:06 -0500 Subject: [PATCH 047/118] Added other synonym types --- .../services/helpers/GenericOntologyLoadHelper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/GenericOntologyLoadHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/GenericOntologyLoadHelper.java index 05e8e35c1..613371037 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/GenericOntologyLoadHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/GenericOntologyLoadHelper.java @@ -17,6 +17,7 @@ import org.alliancegenome.curation_api.model.entities.ontology.OntologyTerm; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.collections.CollectionUtils; +import org.semanticweb.elk.owlapi.ElkReasonerFactory; import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLAnnotation; @@ -30,7 +31,6 @@ import org.semanticweb.owlapi.model.OWLOntologyManager; import org.semanticweb.owlapi.reasoner.OWLReasoner; import org.semanticweb.owlapi.search.EntitySearcher; -import org.semanticweb.elk.owlapi.ElkReasonerFactory; import io.quarkus.logging.Log; @@ -317,7 +317,7 @@ private T parseAnnotation(OWLAnnotation annotation, OWLClass node, T term, Strin term.setObsolete(getBoolean(annotation.getValue())); } else if (key.equals("hasOBONamespace")) { term.setNamespace(getString(annotation.getValue())); - } else if (key.equals("hasExactSynonym") || key.equals("hasRelatedSynonym")) { + } else if (key.equals("hasExactSynonym") || key.equals("hasRelatedSynonym") || key.equals("hasNarrowSynonym") || key.equals("hasBroadSynonym")) { if (term.getSynonyms() == null) { term.setSynonyms(new ArrayList<>()); } From e76902b3039ec521316ef7cd798e3d1247e7db74 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Mon, 11 Nov 2024 11:30:40 -0600 Subject: [PATCH 048/118] Updated Release notes --- RELEASE-NOTES.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 673f39311..aa699123a 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -2,6 +2,41 @@ https://agr-jira.atlassian.net/wiki/spaces/ATEAM/overview +## v0.35.0 +* New features + * Load FBcv into the persistent store (SCRUM-2190) + * Implement Gene Type field/column for Genes data table (SCRUM-3983) + * Turn off auxiliary clean up for associated entities (e.g. clean up of disease annotations based on a gene load) - public site Indexer work (SCRUM-4150) + * SQTR Gene Associations import into Persistent store (SCRUM-4185, SCRUM-4187, SCRUM-4188) + * HTPdataset loaded into persistent store (SCRUM-4192,SCRUM-4303, SCRUM-4341) + * Make obsolete disease annotations hidden by default for all MOD default settings (SCRUM-4277) + * HTPdataset ZFIN Fms files having wrong naming of property (SCRUM-4328) + * Clean up erroneous DOTerms (SCRUM-4337) + * Importing expression annotations into the persistent store - Load ExpressionPattern, TemporalContext, and AnatomicalSite (SCRUM-3952) + * Implement programmatic, regular purging of exception message history for all exceptions older than 2 weeks (SCRUM-4377) + * HTPdatasetSample import P1 - Creating entities and migration file (SCRUM-4426) + * Reenable downloading of all exception messages for a given data load (SCRUM-4452) + * P1: Expression Atlas Import: Load accession data files from Ensembl, etc... (SCRUM-4118) + * GFF Loading (Load assembly, transcripts, exons, and CDSs without positional information; Load positional information for Transcripts, Exons, and CDSs; Load associations between Genes, Transcripts, Exons, and CDSs) (SCRUM-4174, SCRUM-4175, SCRUM-4176) + * Associate Expression Annotations with Go Slim Terms (SCRUM-4300) + * Implement caching of references for DQM loads (SCRUM-4336) + * Accommodate BIOGRID-ORCS loader data type: to establish 3rd part link out to BIOGRID from gene page phenotypes section P1 - create DTO, load via migration, and loading infrastructure (SCRUM-4440) + * Investigate: Accommodate BIOGRID-ORCS loader data type: to establish 3rd part link out to BIOGRID from gene page phenotypes section (SCRUM-4471) + * Load VariantGenomicLocationAssociation from FMS submissions and generate HGVSg identifiers (SCRUM-4499) + * Test effect of removing AuditedObject fields on disease annotation indexing speed (SCRUM-4520) + * Java model changes for disease genetic modifier changes to split based on modifier type (SCRUM-4523) + * Curation UI changes to Disease Annotations table to split out Genetic Modifiers into 3 separate fields for each of Gene, Allele and AGM modifier types (SCRUM-4524) +* Fixes and maintenance + * Edit cancelation button getting squeezed out of frame on alpha data tables (SCRUM-4236) + * Constructs data table on alpha-curation throwing 500 error and not loading (SCRUM-4259) + * Fix SQTR Gene Association Integration Tests (SCRUM-4299) + * Curl timing out when uploading to persistent store (SCRUM-3683) + * Annotations and interactions not indexed on alpha-curation (despite attempts to reindex) (SCRUM-4254) + * Loads not showing in the Data Loads page "Data Processing Info Table" widget (SCRUM-4428) + * New "Running Time" showing negative time counting down (SCRUM-4429) + * Molecule table columns expand automatically when sorting (SCRUM-4531) + + ## v0.34.0 * New features * Paralogy annotations loaded into persistent store (SCRUM-4086, SCRUM-4088, SCRUM-4089, SCRUM-4090) From dafd13d105f9e43d2918fa12a421e30d6efcd30a Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 11 Nov 2024 21:33:27 +0000 Subject: [PATCH 049/118] Orphaned dataprovider cleanup --- .../v0.38.0.5__dataprovider_cleanup.sql | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/main/resources/db/migration/v0.38.0.5__dataprovider_cleanup.sql diff --git a/src/main/resources/db/migration/v0.38.0.5__dataprovider_cleanup.sql b/src/main/resources/db/migration/v0.38.0.5__dataprovider_cleanup.sql new file mode 100644 index 000000000..bfe245581 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.5__dataprovider_cleanup.sql @@ -0,0 +1,20 @@ +CREATE TABLE tmp_unattached_dataproviders ( + id bigint + ); + +INSERT INTO tmp_unattached_dataproviders (id) SELECT id FROM dataprovider; + +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM biologicalentity); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM diseaseannotation); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM geneexpressionannotation); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM htpexpressiondatasetannotation); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM htpexpressiondatasetsampleannotation); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM phenotypeannotation); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM reagent); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT dataprovider_id FROM species); +DELETE FROM tmp_unattached_dataproviders WHERE id IN (SELECT DISTINCT secondarydataprovider_id FROM diseaseannotation); + +DELETE FROM dataprovider WHERE id IN (SELECT DISTINCT id FROM tmp_unattached_dataproviders); + +DROP TABLE tmp_unattached_dataproviders; + From 95887b4ed9b4460b4c00c42242a40d510c98d811 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Mon, 11 Nov 2024 15:40:13 -0600 Subject: [PATCH 050/118] Fixing htpdataset table name column --- .../db/migration/v0.38.0.5__fixing_htpdataset_table.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/v0.38.0.5__fixing_htpdataset_table.sql diff --git a/src/main/resources/db/migration/v0.38.0.5__fixing_htpdataset_table.sql b/src/main/resources/db/migration/v0.38.0.5__fixing_htpdataset_table.sql new file mode 100644 index 000000000..aeea0b6e3 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.5__fixing_htpdataset_table.sql @@ -0,0 +1 @@ +ALTER TABLE htpexpressiondatasetannotation ALTER COLUMN name TYPE text; \ No newline at end of file From dae9832be0f29ae5105803de5e577927a8456833 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 11 Nov 2024 22:07:57 +0000 Subject: [PATCH 051/118] Bump migration version --- ...taprovider_cleanup.sql => v0.38.0.6__dataprovider_cleanup.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{v0.38.0.5__dataprovider_cleanup.sql => v0.38.0.6__dataprovider_cleanup.sql} (100%) diff --git a/src/main/resources/db/migration/v0.38.0.5__dataprovider_cleanup.sql b/src/main/resources/db/migration/v0.38.0.6__dataprovider_cleanup.sql similarity index 100% rename from src/main/resources/db/migration/v0.38.0.5__dataprovider_cleanup.sql rename to src/main/resources/db/migration/v0.38.0.6__dataprovider_cleanup.sql From 7377e45e3b275bd41d9c9f4fe0c310522e28f327 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Tue, 12 Nov 2024 22:31:46 +0000 Subject: [PATCH 052/118] Set null counts as empty hash --- .../migration/v0.38.0.7__fix_bulkloadhistory_set_counts_bug.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/v0.38.0.7__fix_bulkloadhistory_set_counts_bug.sql diff --git a/src/main/resources/db/migration/v0.38.0.7__fix_bulkloadhistory_set_counts_bug.sql b/src/main/resources/db/migration/v0.38.0.7__fix_bulkloadhistory_set_counts_bug.sql new file mode 100644 index 000000000..e45c15e04 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.7__fix_bulkloadhistory_set_counts_bug.sql @@ -0,0 +1 @@ +UPDATE bulkloadfilehistory SET counts = '{}' WHERE counts IS NULL; \ No newline at end of file From 16edfbb21321ab8a19d30696d3531d9e3cd0abd2 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Tue, 12 Nov 2024 23:08:34 +0000 Subject: [PATCH 053/118] Fix sorting of file histories by date --- .../cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js index 19cc43d00..4a6cfa014 100644 --- a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js +++ b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js @@ -599,8 +599,8 @@ export const DataLoadsComponent = () => { let filesWithoutDates = []; files.forEach((file) => { if (file.bulkloadStatus === 'FINISHED' || file.bulkloadStatus === 'STOPPED' || file.bulkloadStatus === 'FAILED') { - if (file.dateLastLoaded) { - lastLoadedDates.set(file.dateLastLoaded, file); + if (file.loadStarted) { + lastLoadedDates.set(file.loadStarted, file); } else { filesWithoutDates.push(file); } From cff10f7f378b5f0ae3506115f7b1df149e175cdb Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Tue, 12 Nov 2024 15:26:27 +0000 Subject: [PATCH 054/118] SCRUM-3953: load expression experiments afer annotations --- ...eneExpressionAnnotationCrudController.java | 6 +- ...eneExpressionExperimentCrudController.java | 34 +++++++++ .../dao/GeneExpressionExperimentDAO.java | 13 ++++ ...GeneExpressionExperimentCrudInterface.java | 22 ++++++ .../executors/GeneExpressionExecutor.java | 53 +++++++++++--- .../model/entities/ExpressionExperiment.java | 65 +++++++++++++++++ .../entities/GeneExpressionExperiment.java | 21 ++++++ .../GeneExpressionAnnotationService.java | 9 ++- .../GeneExpressionExperimentService.java | 72 +++++++++++++++++++ ...eneExpressionAnnotationUniqueIdHelper.java | 9 +++ ...neExpressionAnnotationFmsDTOValidator.java | 21 ++++-- .../v0.38.0.7__gene_expression_experiment.sql | 39 ++++++++++ 12 files changed, 345 insertions(+), 19 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionExperimentCrudController.java create mode 100644 src/main/java/org/alliancegenome/curation_api/dao/GeneExpressionExperimentDAO.java create mode 100644 src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneExpressionExperimentCrudInterface.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java create mode 100644 src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java create mode 100644 src/main/resources/db/migration/v0.38.0.7__gene_expression_experiment.sql diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java index c86c11831..cb8ed315f 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java @@ -3,6 +3,7 @@ import jakarta.annotation.PostConstruct; import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; +import lombok.extern.jbosslog.JBossLog; import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.interfaces.crud.GeneExpressionAnnotationCrudInterface; @@ -16,6 +17,7 @@ import java.util.List; @RequestScoped +@JBossLog public class GeneExpressionAnnotationCrudController extends BaseEntityCrudController implements GeneExpressionAnnotationCrudInterface { @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; @@ -32,7 +34,7 @@ public ObjectResponse getByIdentifier(String identifie } public APIResponse updateExpressionAnnotations(String dataProvider, List annotations) { - return geneExpressionExecutor.runLoadApi(geneExpressionAnnotationService, dataProvider, annotations); + APIResponse response = geneExpressionExecutor.runLoadApi(geneExpressionAnnotationService, dataProvider, annotations); + return response; } } - diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionExperimentCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionExperimentCrudController.java new file mode 100644 index 000000000..b44393683 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionExperimentCrudController.java @@ -0,0 +1,34 @@ +package org.alliancegenome.curation_api.controllers.crud; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; +import org.alliancegenome.curation_api.dao.GeneExpressionExperimentDAO; +import org.alliancegenome.curation_api.interfaces.crud.GeneExpressionExperimentCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.GeneExpressionExecutor; +import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.services.GeneExpressionExperimentService; + + +@RequestScoped +public class GeneExpressionExperimentCrudController extends BaseEntityCrudController implements GeneExpressionExperimentCrudInterface { + + @Inject + GeneExpressionExperimentService geneExpressionExperimentService; + @Inject + GeneExpressionExecutor geneExpressionExecutor; + + @Override + @PostConstruct + protected void init() { + setService(geneExpressionExperimentService); + } + + public ObjectResponse getByIdentifier(String identifierString) { + return geneExpressionExperimentService.getByIdentifier(identifierString); + } + +} + diff --git a/src/main/java/org/alliancegenome/curation_api/dao/GeneExpressionExperimentDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/GeneExpressionExperimentDAO.java new file mode 100644 index 000000000..ab3b8604a --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/GeneExpressionExperimentDAO.java @@ -0,0 +1,13 @@ +package org.alliancegenome.curation_api.dao; + +import jakarta.enterprise.context.ApplicationScoped; +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; + +@ApplicationScoped +public class GeneExpressionExperimentDAO extends BaseSQLDAO { + + protected GeneExpressionExperimentDAO() { + super(GeneExpressionExperiment.class); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneExpressionExperimentCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneExpressionExperimentCrudInterface.java new file mode 100644 index 000000000..4e868b9ed --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/GeneExpressionExperimentCrudInterface.java @@ -0,0 +1,22 @@ +package org.alliancegenome.curation_api.interfaces.crud; + +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.ws.rs.*; +import jakarta.ws.rs.core.MediaType; +import org.alliancegenome.curation_api.interfaces.base.BaseIdCrudInterface; +import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; + + +@Path("/gene-expression-experiment") +@Tag(name = "CRUD - Gene Expression Experiments") +@Produces(MediaType.APPLICATION_JSON) +@Consumes(MediaType.APPLICATION_JSON) +public interface GeneExpressionExperimentCrudInterface extends BaseIdCrudInterface { + @GET + @Path("/findBy/{identifier}") + @JsonView(View.FieldsAndLists.class) + ObjectResponse getByIdentifier(@PathParam("identifier") String identifier); +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index a3898eca6..972914731 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -1,26 +1,33 @@ package org.alliancegenome.curation_api.jobs.executors; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.GZIPInputStream; - +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import lombok.extern.jbosslog.JBossLog; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.GeneExpressionAnnotation; +import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionIngestFmsDTO; import org.alliancegenome.curation_api.services.GeneExpressionAnnotationService; +import org.alliancegenome.curation_api.services.GeneExpressionExperimentService; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.lang3.StringUtils; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.zip.GZIPInputStream; @ApplicationScoped +@JBossLog public class GeneExpressionExecutor extends LoadFileExecutor { - @Inject - GeneExpressionAnnotationService geneExpressionAnnotationService; + @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; + @Inject GeneExpressionExperimentService geneExpressionExperimentService; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { @@ -39,21 +46,47 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount(geneExpressionIngestFmsDTO.getData().size()); updateHistory(bulkLoadFileHistory); - + List annotationIdsLoaded = new ArrayList<>(); List annotationIdsBefore = geneExpressionAnnotationService.getAnnotationIdsByDataProvider(dataProvider); boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); + if (success) { runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene expression annotation"); + loadExperiments(bulkLoadFileHistory); } bulkLoadFileHistory.finishLoad(); updateHistory(bulkLoadFileHistory); updateExceptions(bulkLoadFileHistory); + } catch (Exception e) { failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } + + private void loadExperiments(BulkLoadFileHistory history) { + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + Map> experiments = geneExpressionAnnotationService.getExperiments(); + ph.startProcess("Saving experiments: ", experiments.size()); + for (String experimentId: experiments.keySet()) { + try { + GeneExpressionExperiment experiment = geneExpressionExperimentService.upsert(experimentId, experiments.get(experimentId)); + if (experiment != null) { + history.incrementCompleted(); + } + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateException.ObjectUpdateExceptionData(experimentId, e.getMessage(), e.getStackTrace())); + } + ph.progressProcess(); + } + updateHistory(history); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java new file mode 100644 index 000000000..8939d95e0 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java @@ -0,0 +1,65 @@ +package org.alliancegenome.curation_api.model.entities; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.base.SubmittedObject; +import org.alliancegenome.curation_api.model.entities.ontology.MMOTerm; +import org.alliancegenome.curation_api.view.View; +import org.hibernate.search.engine.backend.types.Aggregable; +import org.hibernate.search.engine.backend.types.Searchable; +import org.hibernate.search.engine.backend.types.Sortable; +import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.*; + +import java.util.List; + +@MappedSuperclass +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") +@JsonSubTypes({ @JsonSubTypes.Type(value = GeneExpressionExperiment.class, name = "GeneExpressionExperiment") }) +@Indexed +@Data +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) +@AGRCurationSchemaVersion(min = "1.7.3", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { SubmittedObject.class }, partial = true) +public abstract class ExpressionExperiment extends SubmittedObject { + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "uniqueId_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @Column(length = 3500) + @JsonView({ View.FieldsOnly.class }) + @EqualsAndHashCode.Include + private String uniqueId; + + @IndexedEmbedded(includePaths = {"geneSymbol.displayText", "geneSymbol.formatText", "geneSymbol.displayText_keyword", "geneSymbol.formatText_keyword", "curie", "curie_keyword", "taxon.curie", "taxon.name", "taxon.curie_keyword", "taxon.name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({View.FieldsOnly.class }) + private Gene entityAssayed; + + @IndexedEmbedded(includePaths = {"curie", "primaryCrossReferenceCurie", "crossReferences.referencedCurie", "curie_keyword", "primaryCrossReferenceCurie_keyword", "crossReferences.referencedCurie_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + private Reference singleReference; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({View.FieldsOnly.class}) + private MMOTerm expressionAssayUsed; + + @OneToMany(cascade = CascadeType.ALL, orphanRemoval = true) + @JsonView({View.FieldsAndLists.class}) + private List expressionAnnotations; + + @Transient private Allele specimenGenomicModel; + @Transient private List detectionReagents; + @Transient private List specimenAlleles; + @Transient private List relatedNotes; + @Transient private List conditionRelations; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java new file mode 100644 index 000000000..dc0745e33 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java @@ -0,0 +1,21 @@ +package org.alliancegenome.curation_api.model.entities; + +import jakarta.persistence.Entity; +import jakarta.persistence.Index; +import jakarta.persistence.Table; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; + +@Indexed +@Entity +@Data +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) +@AGRCurationSchemaVersion(min = "1.7.3", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { ExpressionExperiment.class }, partial = true) +@Table(indexes = { + @Index(name = "on_index", columnList = "id") +}) +public class GeneExpressionExperiment extends ExpressionExperiment { +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java index 1ffe515ac..e237bdbfa 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java @@ -4,6 +4,8 @@ import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; import jakarta.transaction.Transactional; +import lombok.Getter; +import lombok.extern.jbosslog.JBossLog; import org.alliancegenome.curation_api.constants.EntityFieldConstants; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; @@ -18,17 +20,22 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +@JBossLog @RequestScoped public class GeneExpressionAnnotationService extends BaseAnnotationCrudService implements BaseUpsertServiceInterface { @Inject GeneExpressionAnnotationDAO geneExpressionAnnotationDAO; @Inject GeneExpressionAnnotationFmsDTOValidator geneExpressionAnnotationFmsDTOValidator; + @Getter + private Map> experiments; @Override @PostConstruct protected void init() { setSQLDao(geneExpressionAnnotationDAO); + experiments = new HashMap<>(); } public List getAnnotationIdsByDataProvider(BackendBulkDataProvider dataProvider) { @@ -44,7 +51,7 @@ public List getAnnotationIdsByDataProvider(BackendBulkDataProvider dataPro @Transactional @Override public GeneExpressionAnnotation upsert(GeneExpressionFmsDTO geneExpressionFmsDTO, BackendBulkDataProvider dataProvider) throws ValidationException { - GeneExpressionAnnotation geneExpressionAnnotation = geneExpressionAnnotationFmsDTOValidator.validateAnnotation(geneExpressionFmsDTO, dataProvider); + GeneExpressionAnnotation geneExpressionAnnotation = geneExpressionAnnotationFmsDTOValidator.validateAnnotation(geneExpressionFmsDTO, dataProvider, experiments); return geneExpressionAnnotationDAO.persist(geneExpressionAnnotation); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java new file mode 100644 index 000000000..4ff18aa19 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java @@ -0,0 +1,72 @@ +package org.alliancegenome.curation_api.services; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; +import lombok.extern.jbosslog.JBossLog; +import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; +import org.alliancegenome.curation_api.dao.GeneExpressionExperimentDAO; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.GeneExpressionAnnotation; +import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import org.alliancegenome.curation_api.services.ontology.MmoTermService; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +@JBossLog +@RequestScoped +public class GeneExpressionExperimentService extends BaseEntityCrudService { + + @Inject GeneExpressionExperimentDAO geneExpressionExperimentDAO; + @Inject GeneExpressionAnnotationDAO geneExpressionAnnotationDAO; + @Inject GeneService geneService; + @Inject MmoTermService mmoTermService; + @Inject ReferenceService referenceService; + + @Override + @PostConstruct + protected void init() { + setSQLDao(geneExpressionExperimentDAO); + } + + @Transactional + public GeneExpressionExperiment upsert(String experimentId, Set geneExpressionAnnotationIds) throws ValidationException { + GeneExpressionExperiment geneExpressionExperiment; + List annotations; + + // example of experimentId: Xenbase:XB-GENE-972235|AGRKB:101000000874667|MMO:0000658 + String[] definingFields = experimentId.split("\\|", 3); + String geneId = definingFields[0]; + String referenceId = definingFields[1]; + String assayId = definingFields[2]; + SearchResponse response = geneExpressionExperimentDAO.findByField("uniqueId", experimentId); + if (response != null && response.getSingleResult() != null) { + geneExpressionExperiment = response.getSingleResult(); + } else { + geneExpressionExperiment = new GeneExpressionExperiment(); + geneExpressionExperiment.setUniqueId(experimentId); + } + + geneExpressionExperiment.setEntityAssayed(geneService.findByIdentifierString(geneId)); + geneExpressionExperiment.setSingleReference(referenceService.getByCurie(referenceId).getEntity()); + geneExpressionExperiment.setExpressionAssayUsed(mmoTermService.findByCurie(assayId)); + geneExpressionExperiment.setInternal(false); + geneExpressionExperiment.setObsolete(false); + + annotations = geneExpressionExperiment.getExpressionAnnotations(); + if (annotations == null) { + annotations = new ArrayList<>(); + } + for (String geneExpressionAnnotationId: geneExpressionAnnotationIds) { + annotations.add(geneExpressionAnnotationDAO.findByField("uniqueId", geneExpressionAnnotationId).getSingleResult()); + } + geneExpressionExperiment.setExpressionAnnotations(annotations); + + return geneExpressionExperimentDAO.persist(geneExpressionExperiment); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java index e04ca58a8..a243051a6 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java @@ -25,4 +25,13 @@ public String generateUniqueId(GeneExpressionFmsDTO geneExpressionFmsDTO, String } return uniqueIdGeneratorHelper.getUniqueId(); } + + // UniqueID = geneId | evidenceReferenceCurie | assayId + public String generateExperimentId(GeneExpressionFmsDTO geneExpressionFmsDTO, String referenceCurie) { + UniqueIdGeneratorHelper uniqueIdGeneratorHelper = new UniqueIdGeneratorHelper(); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getGeneId()); + uniqueIdGeneratorHelper.add(referenceCurie); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getAssay()); + return uniqueIdGeneratorHelper.getUniqueId(); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index 392106309..bf2ffeea9 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -6,8 +6,8 @@ import org.alliancegenome.curation_api.constants.VocabularyConstants; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.exceptions.ObjectValidationException; +import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.*; import org.alliancegenome.curation_api.model.entities.ontology.*; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionFmsDTO; @@ -27,8 +27,7 @@ import java.time.OffsetDateTime; import java.time.format.DateTimeParseException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; @RequestScoped public class GeneExpressionAnnotationFmsDTOValidator { @@ -46,17 +45,19 @@ public class GeneExpressionAnnotationFmsDTOValidator { @Inject StageTermService stageTermService; @Inject OntologyTermService ontologyTermService; - public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpressionFmsDTO, BackendBulkDataProvider dataProvider) throws ValidationException { + public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpressionFmsDTO, BackendBulkDataProvider dataProvider, Map> experiments) throws ValidationException { ObjectResponse response = new ObjectResponse<>(); GeneExpressionAnnotation geneExpressionAnnotation; + String uniqueId; + String referenceCurie; ObjectResponse singleReferenceResponse = validateEvidence(geneExpressionFmsDTO); if (singleReferenceResponse.hasErrors()) { response.addErrorMessage("singleReference", singleReferenceResponse.errorMessagesString()); throw new ObjectValidationException(geneExpressionFmsDTO, response.errorMessagesString()); } else { - String referenceCurie = singleReferenceResponse.getEntity().getCurie(); - String uniqueId = geneExpressionAnnotationUniqueIdHelper.generateUniqueId(geneExpressionFmsDTO, referenceCurie); + referenceCurie = singleReferenceResponse.getEntity().getCurie(); + uniqueId = geneExpressionAnnotationUniqueIdHelper.generateUniqueId(geneExpressionFmsDTO, referenceCurie); SearchResponse annotationDB = geneExpressionAnnotationDAO.findByField("uniqueId", uniqueId); if (annotationDB != null && annotationDB.getSingleResult() != null) { geneExpressionAnnotation = annotationDB.getSingleResult(); @@ -130,6 +131,14 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr if (response.hasErrors()) { throw new ObjectValidationException(geneExpressionFmsDTO, response.errorMessagesString()); } + String experimentId = geneExpressionAnnotationUniqueIdHelper.generateExperimentId(geneExpressionFmsDTO, referenceCurie); + if (experiments.containsKey(experimentId)) { + experiments.get(experimentId).add(uniqueId); + } else { + Set expressionIds = new HashSet<>(); + expressionIds.add(uniqueId); + experiments.put(experimentId, expressionIds); + } return geneExpressionAnnotation; } diff --git a/src/main/resources/db/migration/v0.38.0.7__gene_expression_experiment.sql b/src/main/resources/db/migration/v0.38.0.7__gene_expression_experiment.sql new file mode 100644 index 000000000..a6096e759 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.7__gene_expression_experiment.sql @@ -0,0 +1,39 @@ + +CREATE SEQUENCE public.geneexpressionexperiment_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; + +CREATE TABLE IF NOT EXISTS public.geneexpressionexperiment ( + id BIGINT CONSTRAINT expressionexperiment_pkey PRIMARY KEY, + uniqueid varchar(3500), + curie VARCHAR(255), + modentityid VARCHAR(255), + modinternalid VARCHAR(255), + singlereference_id BIGINT, + entityassayed_id BIGINT, + expressionassayused_id BIGINT, + specimengenomicmodel_id BIGINT, + dataprovider_id BIGINT, + datecreated timestamp without time zone, + dbdatecreated timestamp without time zone, + dateupdated timestamp without time zone, + dbdateupdated timestamp without time zone, + internal boolean DEFAULT false, + obsolete boolean DEFAULT false, + createdby_id bigint, + updatedby_id bigint +); + +CREATE SEQUENCE public.geneexpressionexperiment_geneexpressionannotation_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; + +CREATE TABLE IF NOT EXISTS public.geneexpressionexperiment_geneexpressionannotation ( + geneexpressionexperiment_id bigint NOT NULL, + expressionannotations_id bigint NOT NULL, + + CONSTRAINT gen_exp_exp_experiment_fkey FOREIGN KEY (geneexpressionexperiment_id) + REFERENCES public.geneexpressionexperiment (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION, + CONSTRAINT gen_exp_exp_annotation_fkey FOREIGN KEY (expressionannotations_id) + REFERENCES public.geneexpressionannotation (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION +); From 1bcf5acfc55bbd87a8ade4af45db92b330ccf3d9 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Wed, 13 Nov 2024 14:09:35 +0000 Subject: [PATCH 055/118] SCRUM-3953: migration number --- ...n_experiment.sql => v0.38.0.8__gene_expression_experiment.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{v0.38.0.7__gene_expression_experiment.sql => v0.38.0.8__gene_expression_experiment.sql} (100%) diff --git a/src/main/resources/db/migration/v0.38.0.7__gene_expression_experiment.sql b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql similarity index 100% rename from src/main/resources/db/migration/v0.38.0.7__gene_expression_experiment.sql rename to src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql From 812385b230e57ac2e1130b09f82ef018224d79fd Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Thu, 14 Nov 2024 14:06:08 +0000 Subject: [PATCH 056/118] SCRUM-3953: fix migration and remove logging --- ...eneExpressionAnnotationCrudController.java | 2 - .../executors/GeneExpressionExecutor.java | 13 +++-- .../entities/GeneExpressionExperiment.java | 2 +- .../GeneExpressionAnnotationService.java | 2 - .../v0.38.0.8__gene_expression_experiment.sql | 49 +++++++++++-------- 5 files changed, 38 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java index cb8ed315f..1e1ab09c7 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java @@ -3,7 +3,6 @@ import jakarta.annotation.PostConstruct; import jakarta.enterprise.context.RequestScoped; import jakarta.inject.Inject; -import lombok.extern.jbosslog.JBossLog; import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.interfaces.crud.GeneExpressionAnnotationCrudInterface; @@ -17,7 +16,6 @@ import java.util.List; @RequestScoped -@JBossLog public class GeneExpressionAnnotationCrudController extends BaseEntityCrudController implements GeneExpressionAnnotationCrudInterface { @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index 972914731..d39a5c571 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -2,7 +2,6 @@ import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; -import lombok.extern.jbosslog.JBossLog; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; @@ -24,12 +23,15 @@ import java.util.zip.GZIPInputStream; @ApplicationScoped -@JBossLog public class GeneExpressionExecutor extends LoadFileExecutor { @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; @Inject GeneExpressionExperimentService geneExpressionExperimentService; + static final String ANNOTATIONS = "gene expression annotations"; + static final String EXPERIMENTS = "gene expresion experiments"; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + + try { BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); @@ -44,7 +46,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { } bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - bulkLoadFileHistory.setCount(geneExpressionIngestFmsDTO.getData().size()); + bulkLoadFileHistory.setCount(ANNOTATIONS, geneExpressionIngestFmsDTO.getData().size()); updateHistory(bulkLoadFileHistory); List annotationIdsLoaded = new ArrayList<>(); @@ -53,7 +55,8 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); if (success) { - runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene expression annotation"); + runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, ANNOTATIONS); + bulkLoadFileHistory.setCount(EXPERIMENTS, geneExpressionAnnotationService.getExperiments().size()); loadExperiments(bulkLoadFileHistory); } @@ -70,7 +73,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { private void loadExperiments(BulkLoadFileHistory history) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); Map> experiments = geneExpressionAnnotationService.getExperiments(); - ph.startProcess("Saving experiments: ", experiments.size()); + ph.startProcess("Saving " + EXPERIMENTS, experiments.size()); for (String experimentId: experiments.keySet()) { try { GeneExpressionExperiment experiment = geneExpressionExperimentService.upsert(experimentId, experiments.get(experimentId)); diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java index dc0745e33..9f2caee9c 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java @@ -13,7 +13,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) -@AGRCurationSchemaVersion(min = "1.7.3", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { ExpressionExperiment.class }, partial = true) +@AGRCurationSchemaVersion(min = "2.8.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { ExpressionExperiment.class }, partial = true) @Table(indexes = { @Index(name = "on_index", columnList = "id") }) diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java index e237bdbfa..2d1229ed8 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionAnnotationService.java @@ -5,7 +5,6 @@ import jakarta.inject.Inject; import jakarta.transaction.Transactional; import lombok.Getter; -import lombok.extern.jbosslog.JBossLog; import org.alliancegenome.curation_api.constants.EntityFieldConstants; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; @@ -22,7 +21,6 @@ import java.util.Map; import java.util.Set; -@JBossLog @RequestScoped public class GeneExpressionAnnotationService extends BaseAnnotationCrudService implements BaseUpsertServiceInterface { diff --git a/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql index a6096e759..3024a8d5c 100644 --- a/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql +++ b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql @@ -1,39 +1,48 @@ +CREATE SEQUENCE geneexpressionexperiment_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; -CREATE SEQUENCE public.geneexpressionexperiment_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; - -CREATE TABLE IF NOT EXISTS public.geneexpressionexperiment ( +CREATE TABLE geneexpressionexperiment ( id BIGINT CONSTRAINT expressionexperiment_pkey PRIMARY KEY, uniqueid varchar(3500), curie VARCHAR(255), modentityid VARCHAR(255), modinternalid VARCHAR(255), - singlereference_id BIGINT, - entityassayed_id BIGINT, - expressionassayused_id BIGINT, - specimengenomicmodel_id BIGINT, - dataprovider_id BIGINT, + singlereference_id BIGINT REFERENCES public.reference (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + entityassayed_id BIGINT REFERENCES public.gene (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + expressionassayused_id BIGINT REFERENCES public.ontologyterm (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + specimengenomicmodel_id BIGINT REFERENCES public.allele (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + dataprovider_id BIGINT REFERENCES public.dataprovider (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, datecreated timestamp without time zone, dbdatecreated timestamp without time zone, dateupdated timestamp without time zone, dbdateupdated timestamp without time zone, internal boolean DEFAULT false, obsolete boolean DEFAULT false, - createdby_id bigint, - updatedby_id bigint + createdby_id bigint REFERENCES public.person (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + updatedby_id bigint REFERENCES public.person (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION ); -CREATE SEQUENCE public.geneexpressionexperiment_geneexpressionannotation_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; +CREATE INDEX geneexpressionexperiment_uniqueid_index ON geneexpressionexperiment USING btree (uniqueid); +CREATE INDEX geneexpressionexperiment_curie_index ON geneexpressionexperiment USING btree (curie); +CREATE INDEX geneexpressionexperiment_modentityid_index ON geneexpressionexperiment USING btree (modentityid); +CREATE INDEX geneexpressionexperiment_modinternalid_index ON geneexpressionexperiment USING btree (modinternalid); +CREATE INDEX geneexpressionexperiment_singlereference_index ON geneexpressionexperiment USING btree (singlereference_id); +CREATE INDEX geneexpressionexperiment_entityassayedused_index ON geneexpressionexperiment USING btree (entityassayed_id); +CREATE INDEX geneexpressionexperiment_expressionassayused_index ON geneexpressionexperiment USING btree (expressionassayused_id); +CREATE INDEX geneexpressionexperiment_dataprovider_index ON geneexpressionexperiment USING btree (dataprovider_id); +CREATE INDEX geneexpressionexperiment_internal_index ON geneexpressionexperiment USING btree (internal); +CREATE INDEX geneexpressionexperiment_obsolete_index ON geneexpressionexperiment USING btree (obsolete); +CREATE INDEX geneexpressionexperiment_createdby_index ON geneexpressionexperiment USING btree (createdby_id); +CREATE INDEX geneexpressionexperiment_updatedby_index ON geneexpressionexperiment USING btree (updatedby_id); + +CREATE SEQUENCE geneexpressionexperiment_geneexpressionannotation_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; -CREATE TABLE IF NOT EXISTS public.geneexpressionexperiment_geneexpressionannotation ( +CREATE TABLE IF NOT EXISTS geneexpressionexperiment_geneexpressionannotation ( geneexpressionexperiment_id bigint NOT NULL, expressionannotations_id bigint NOT NULL, - CONSTRAINT gen_exp_exp_experiment_fkey FOREIGN KEY (geneexpressionexperiment_id) - REFERENCES public.geneexpressionexperiment (id) MATCH SIMPLE - ON UPDATE NO ACTION - ON DELETE NO ACTION, - CONSTRAINT gen_exp_exp_annotation_fkey FOREIGN KEY (expressionannotations_id) - REFERENCES public.geneexpressionannotation (id) MATCH SIMPLE - ON UPDATE NO ACTION - ON DELETE NO ACTION + CONSTRAINT gen_exp_exp_experiment_fkey FOREIGN KEY (geneexpressionexperiment_id) REFERENCES geneexpressionexperiment (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + CONSTRAINT gen_exp_exp_annotation_fkey FOREIGN KEY (expressionannotations_id) REFERENCES geneexpressionannotation (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION ); + +CREATE INDEX geneexpressionexperiment_gea_experiment_index ON geneexpressionexperiment_geneexpressionannotation USING btree (geneexpressionexperiment_id); +CREATE INDEX geneexpressionexperiment_gea_annotations_index ON geneexpressionexperiment_geneexpressionannotation USING btree (expressionannotations_id); From 81d693b95f28ead2219e520be7776436e304f9a3 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 14:13:52 +0000 Subject: [PATCH 057/118] Update entity counts page --- .../src/components/EntityCountsComponent.js | 29 +- src/main/cliapp/src/constants/Classes.js | 354 ++++++++++++++---- .../src/containers/dashboardPage/Dashboard.js | 11 +- 3 files changed, 314 insertions(+), 80 deletions(-) diff --git a/src/main/cliapp/src/components/EntityCountsComponent.js b/src/main/cliapp/src/components/EntityCountsComponent.js index 7cd9ad915..7a5333e73 100644 --- a/src/main/cliapp/src/components/EntityCountsComponent.js +++ b/src/main/cliapp/src/components/EntityCountsComponent.js @@ -28,6 +28,8 @@ export const EntityCountsComponent = () => { _tableData[type].push({ name: CLASSES[key].name, link: CLASSES[key].link, + type: CLASSES[key].type, + isIndexed: CLASSES[key].isIndexed, dbCount: res.entity[key]['dbCount'], esCount: res.entity[key]['esCount'], }); @@ -45,10 +47,9 @@ export const EntityCountsComponent = () => { return {rowData.name}; }; - const ROW_HIGHLIGHT_IGNORE = ['Disease Annotations', 'Literature References', 'Bulk Loads / Failed Loads']; - const getRowClass = (rowData) => { - if (ROW_HIGHLIGHT_IGNORE.includes(rowData.name)) return; + console.log(rowData); + if (!rowData.isIndexed) return; if (rowData?.dbCount !== rowData.esCount) { return 'bg-gray-500 text-white'; @@ -58,7 +59,7 @@ export const EntityCountsComponent = () => { return ( <>
-
+
{ />
-
+
{ />
-
+
+ getRowClass(rowData)} + > + + } + /> + +
+
{ useEffect(() => { let _tableData = {}; - const excludedEntities = [ - 'AGMDiseaseAnnotation', - 'AlleleDiseaseAnnotation', - 'GeneDiseaseAnnotation', - 'AGMPhenotypeAnnotation', - 'AllelePhenotypeAnnotation', - 'GenePhenotypeAnnotation', - ]; - for (const key in CLASSES) { const { type } = CLASSES[key]; if (!_tableData[type]) { _tableData[type] = []; } - if (!excludedEntities.includes(key)) { + if (CLASSES[key].hasTable) { _tableData[type].push({ name: CLASSES[key].name, link: CLASSES[key].link, From cf56fcbcfba512fb92609162055b29658fe01298 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 14:18:20 +0000 Subject: [PATCH 058/118] Update GenomeAssembly --- src/main/cliapp/src/constants/Classes.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/cliapp/src/constants/Classes.js b/src/main/cliapp/src/constants/Classes.js index 0c1bbf406..37e0a9145 100644 --- a/src/main/cliapp/src/constants/Classes.js +++ b/src/main/cliapp/src/constants/Classes.js @@ -125,7 +125,7 @@ export const CLASSES = Object.freeze({ link: '/#/genomeAssemblies', type: 'entity', hasTable: false, - isIndexed: false, + isIndexed: true, }, GeneExpressionAnnotation: { name: 'Gene Expression Annotations', From 1b787235b3914c6a76f1149c6b068514546b17a7 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Thu, 14 Nov 2024 14:25:15 +0000 Subject: [PATCH 059/118] SCRUM-3953: linkml version --- .../curation_api/model/entities/ExpressionExperiment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java index 8939d95e0..e38936d74 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java @@ -25,7 +25,7 @@ @Indexed @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) -@AGRCurationSchemaVersion(min = "1.7.3", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { SubmittedObject.class }, partial = true) +@AGRCurationSchemaVersion(min = "2.8.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { SubmittedObject.class }, partial = true) public abstract class ExpressionExperiment extends SubmittedObject { @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") From e0c89817559ccc4d90a8050f708a44b8f4496273 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Thu, 14 Nov 2024 14:49:43 +0000 Subject: [PATCH 060/118] SCRUM-3953: change list to set --- .../curation_api/model/entities/ExpressionExperiment.java | 3 ++- .../services/GeneExpressionExperimentService.java | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java index e38936d74..50aa2a99a 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java @@ -18,6 +18,7 @@ import org.hibernate.search.mapper.pojo.mapping.definition.annotation.*; import java.util.List; +import java.util.Set; @MappedSuperclass @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") @@ -55,7 +56,7 @@ public abstract class ExpressionExperiment extends SubmittedObject { @OneToMany(cascade = CascadeType.ALL, orphanRemoval = true) @JsonView({View.FieldsAndLists.class}) - private List expressionAnnotations; + private Set expressionAnnotations; @Transient private Allele specimenGenomicModel; @Transient private List detectionReagents; diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java index 4ff18aa19..7ffdd7cd6 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java @@ -14,8 +14,7 @@ import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.ontology.MmoTermService; -import java.util.ArrayList; -import java.util.List; +import java.util.HashSet; import java.util.Set; @JBossLog @@ -37,7 +36,7 @@ protected void init() { @Transactional public GeneExpressionExperiment upsert(String experimentId, Set geneExpressionAnnotationIds) throws ValidationException { GeneExpressionExperiment geneExpressionExperiment; - List annotations; + Set annotations; // example of experimentId: Xenbase:XB-GENE-972235|AGRKB:101000000874667|MMO:0000658 String[] definingFields = experimentId.split("\\|", 3); @@ -60,7 +59,7 @@ public GeneExpressionExperiment upsert(String experimentId, Set geneExpr annotations = geneExpressionExperiment.getExpressionAnnotations(); if (annotations == null) { - annotations = new ArrayList<>(); + annotations = new HashSet<>(); } for (String geneExpressionAnnotationId: geneExpressionAnnotationIds) { annotations.add(geneExpressionAnnotationDAO.findByField("uniqueId", geneExpressionAnnotationId).getSingleResult()); From 5ef07ea78a03363a1083cb04bfeb0105b2e5e220 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 15:50:29 +0000 Subject: [PATCH 061/118] Remove links for entities without tables --- src/main/cliapp/src/components/EntityCountsComponent.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/cliapp/src/components/EntityCountsComponent.js b/src/main/cliapp/src/components/EntityCountsComponent.js index 7a5333e73..13073b14b 100644 --- a/src/main/cliapp/src/components/EntityCountsComponent.js +++ b/src/main/cliapp/src/components/EntityCountsComponent.js @@ -29,6 +29,7 @@ export const EntityCountsComponent = () => { name: CLASSES[key].name, link: CLASSES[key].link, type: CLASSES[key].type, + hasTable: CLASSES[key].hasTable, isIndexed: CLASSES[key].isIndexed, dbCount: res.entity[key]['dbCount'], esCount: res.entity[key]['esCount'], @@ -44,6 +45,9 @@ export const EntityCountsComponent = () => { }, []); const nameHyperlinkTemplate = (rowData) => { + if (!rowData.hasTable) { + return rowData.name; + } return {rowData.name}; }; From 10e1b8c0c7fca464fac63c21aa1beaf8bf9cbb3a Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 15:59:29 +0000 Subject: [PATCH 062/118] Use link presence instead of hasTable to allow linking to parent tables --- .../src/components/EntityCountsComponent.js | 3 +-- src/main/cliapp/src/constants/Classes.js | 21 ------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/src/main/cliapp/src/components/EntityCountsComponent.js b/src/main/cliapp/src/components/EntityCountsComponent.js index 13073b14b..d1caed616 100644 --- a/src/main/cliapp/src/components/EntityCountsComponent.js +++ b/src/main/cliapp/src/components/EntityCountsComponent.js @@ -29,7 +29,6 @@ export const EntityCountsComponent = () => { name: CLASSES[key].name, link: CLASSES[key].link, type: CLASSES[key].type, - hasTable: CLASSES[key].hasTable, isIndexed: CLASSES[key].isIndexed, dbCount: res.entity[key]['dbCount'], esCount: res.entity[key]['esCount'], @@ -45,7 +44,7 @@ export const EntityCountsComponent = () => { }, []); const nameHyperlinkTemplate = (rowData) => { - if (!rowData.hasTable) { + if (!rowData.link) { return rowData.name; } return {rowData.name}; diff --git a/src/main/cliapp/src/constants/Classes.js b/src/main/cliapp/src/constants/Classes.js index 37e0a9145..4f9fd3acf 100644 --- a/src/main/cliapp/src/constants/Classes.js +++ b/src/main/cliapp/src/constants/Classes.js @@ -98,7 +98,6 @@ export const CLASSES = Object.freeze({ Reference: { name: 'Literature References', link: '/#/references', type: 'entity', hasTable: true, isIndexed: false }, SequenceTargetingReagent: { name: 'Sequence Targeting Reagents', - link: '/#/sqtrs', type: 'entity', hasTable: false, isIndexed: true, @@ -108,56 +107,48 @@ export const CLASSES = Object.freeze({ Exon: { name: 'Exons', link: '/#/exons', type: 'entity', hasTable: false, isIndexed: false }, CodingSequence: { name: 'Coding Sequences', - link: '/#/codingSequences', type: 'entity', hasTable: false, isIndexed: false, }, AssemblyComponent: { name: 'Assembly Components', - link: '/#/assemblyComponents', type: 'entity', hasTable: false, isIndexed: false, }, GenomeAssembly: { name: 'Genome Assemblies', - link: '/#/genomeAssemblies', type: 'entity', hasTable: false, isIndexed: true, }, GeneExpressionAnnotation: { name: 'Gene Expression Annotations', - link: '/#/geneExpressionAnnotations', type: 'entity', hasTable: false, isIndexed: true, }, PredictedVariantConsequence: { name: 'Predicted Variant Consequences', - link: '/#/predictedVariantConsequences', type: 'entity', hasTable: false, isIndexed: false, }, HTPExpressionDatasetAnnotation: { name: 'HTP Expression Dataset Annotations', - link: '/#/htpExpressionDatasetAnnotations', type: 'entity', hasTable: false, isIndexed: false, }, HTPExpressionDatasetSampleAnnotation: { name: 'HTP Expression Dataset Sample Annotations', - link: '/#/htpExpressionDatasetSampleAnnotations', type: 'entity', hasTable: false, isIndexed: false, }, GeneOntologyAnnotation: { name: 'Gene Ontology Annotations', - link: '/#/geneOntologyAnnotations', type: 'entity', hasTable: false, isIndexed: false, @@ -226,73 +217,61 @@ export const CLASSES = Object.freeze({ AlleleGeneAssociation: { name: 'Allele Gene Associations', - link: '/#/alleleGeneAssociations', type: 'association', hasTable: false, }, AlleleVariantAssociation: { name: 'Allele Variant Associations', - link: '/#/alleleVariantAssociations', type: 'association', hasTable: false, }, CodingSequenceGenomicLocationAssociation: { name: 'CDS Genomic Locations', - link: '/#/cdsGenomicLocations', type: 'association', hasTable: false, }, ExonGenomicLocationAssociation: { name: 'Exon Genomic Locations', - link: '/#/exonGenomicLocations', type: 'association', hasTable: false, }, GeneGenomicLocationAssociation: { name: 'Gene Genomic Locations', - link: '/#/geneGenomicLocations', type: 'association', hasTable: false, }, TranscriptGenomicLocationAssociation: { name: 'Transcript Genomic Locations', - link: '/#/transcriptGenomicLocations', type: 'association', hasTable: false, }, VariantGenomicLocationAssociation: { name: 'Variant Genomic Locations', - link: '/#/variantGenomicLocations', type: 'association', hasTable: false, }, ConstructGenomicEntityAssociation: { name: 'Construct Genomic Entity Associations', - link: '/#/constructGenomicEntityAssociations', type: 'association', hasTable: false, }, SequenceTargetingReagentGeneAssociation: { name: 'SQTR Gene Associations', - link: '/#/sqtrGeneAssociations', type: 'association', hasTable: false, }, TranscriptCodingSequenceAssociation: { name: 'Transcript CDS Associations', - link: '/#/transcriptCodingSequenceAssociations', type: 'association', hasTable: false, }, TranscriptExonAssociation: { name: 'Transcript Exon Associations', - link: '/#/transcriptExonAssociations', type: 'association', hasTable: false, }, TranscriptGeneAssociation: { name: 'Transcript Gene Associations', - link: '/#/transcriptGeneAssociations', type: 'association', hasTable: false, }, From 27baf84ebc4869dd1c8e1d58bdf35b0948f40fff Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 16:13:07 +0000 Subject: [PATCH 063/118] Failsafe check for links on dashboard --- src/main/cliapp/src/containers/dashboardPage/Dashboard.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/cliapp/src/containers/dashboardPage/Dashboard.js b/src/main/cliapp/src/containers/dashboardPage/Dashboard.js index 7ab4e2623..856dda775 100644 --- a/src/main/cliapp/src/containers/dashboardPage/Dashboard.js +++ b/src/main/cliapp/src/containers/dashboardPage/Dashboard.js @@ -26,6 +26,9 @@ export const Dashboard = () => { }, []); const nameHyperlinkTemplate = (rowData) => { + if (!rowData.link) { + return rowData.name; + } return {rowData.name}; }; From e91c79d0ea3acd61c39681111aa47d77631050c4 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 18:25:11 +0000 Subject: [PATCH 064/118] Remove dead links --- src/main/cliapp/src/constants/Classes.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/cliapp/src/constants/Classes.js b/src/main/cliapp/src/constants/Classes.js index 4f9fd3acf..0ec2de27d 100644 --- a/src/main/cliapp/src/constants/Classes.js +++ b/src/main/cliapp/src/constants/Classes.js @@ -103,8 +103,8 @@ export const CLASSES = Object.freeze({ isIndexed: true, }, Species: { name: 'Species', link: '/#/species', type: 'entity', hasTable: true, isIndexed: true }, - Transcript: { name: 'Transcripts', link: '/#/transcripts', type: 'entity', hasTable: false, isIndexed: false }, - Exon: { name: 'Exons', link: '/#/exons', type: 'entity', hasTable: false, isIndexed: false }, + Transcript: { name: 'Transcripts', type: 'entity', hasTable: false, isIndexed: false }, + Exon: { name: 'Exons', type: 'entity', hasTable: false, isIndexed: false }, CodingSequence: { name: 'Coding Sequences', type: 'entity', @@ -153,8 +153,8 @@ export const CLASSES = Object.freeze({ hasTable: false, isIndexed: false, }, - GeneToGeneParalogy: { name: 'Paralogy', link: '/#/paralogy', type: 'entity', hasTable: false, isIndexed: false }, - GeneToGeneOrthology: { name: 'Orthology', link: '/#/orthology', type: 'entity', hasTable: false, isIndexed: false }, + GeneToGeneParalogy: { name: 'Paralogy', type: 'entity', hasTable: false, isIndexed: false }, + GeneToGeneOrthology: { name: 'Orthology', type: 'entity', hasTable: false, isIndexed: false }, DOTerm: { name: 'DO', link: '/#/ontology/do', type: 'ontology', hasTable: true, isIndexed: true }, CHEBITerm: { name: 'CHEBI', link: '/#/ontology/chebi', type: 'ontology', hasTable: true, isIndexed: true }, From 3261735d9dc9f563c625e7ddcfa39ae757b24b08 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Thu, 14 Nov 2024 18:39:19 +0000 Subject: [PATCH 065/118] Column rename --- src/main/cliapp/src/components/EntityCountsComponent.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/cliapp/src/components/EntityCountsComponent.js b/src/main/cliapp/src/components/EntityCountsComponent.js index d1caed616..857410235 100644 --- a/src/main/cliapp/src/components/EntityCountsComponent.js +++ b/src/main/cliapp/src/components/EntityCountsComponent.js @@ -115,7 +115,7 @@ export const EntityCountsComponent = () => { } /> From d749d997a6f958b8b3a3b0533810f4e3fd63e7fa Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Fri, 15 Nov 2024 13:19:13 +0000 Subject: [PATCH 066/118] SCRUM-3953: add @INDEX annotations --- .../jobs/executors/GeneExpressionExecutor.java | 15 +++++++-------- .../model/entities/ExpressionExperiment.java | 2 +- .../model/entities/GeneExpressionExperiment.java | 15 +++++++++++++-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index d39a5c571..2377886e3 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -26,9 +26,8 @@ public class GeneExpressionExecutor extends LoadFileExecutor { @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; @Inject GeneExpressionExperimentService geneExpressionExperimentService; - static final String ANNOTATIONS = "gene expression annotations"; - static final String EXPERIMENTS = "gene expresion experiments"; - + static final String ANNOTATIONS = "gen_exp_annotations"; + static final String EXPERIMENTS = "gen_exp_experiments"; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { @@ -55,7 +54,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); if (success) { - runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, ANNOTATIONS); + runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene expression annotations"); bulkLoadFileHistory.setCount(EXPERIMENTS, geneExpressionAnnotationService.getExperiments().size()); loadExperiments(bulkLoadFileHistory); } @@ -73,19 +72,19 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { private void loadExperiments(BulkLoadFileHistory history) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); Map> experiments = geneExpressionAnnotationService.getExperiments(); - ph.startProcess("Saving " + EXPERIMENTS, experiments.size()); + ph.startProcess("Saving gene expression experiments: ", experiments.size()); for (String experimentId: experiments.keySet()) { try { GeneExpressionExperiment experiment = geneExpressionExperimentService.upsert(experimentId, experiments.get(experimentId)); if (experiment != null) { - history.incrementCompleted(); + history.incrementCompleted(EXPERIMENTS); } } catch (ObjectUpdateException e) { - history.incrementFailed(); + history.incrementFailed(EXPERIMENTS); addException(history, e.getData()); } catch (Exception e) { e.printStackTrace(); - history.incrementFailed(); + history.incrementFailed(EXPERIMENTS); addException(history, new ObjectUpdateException.ObjectUpdateExceptionData(experimentId, e.getMessage(), e.getStackTrace())); } ph.progressProcess(); diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java index 50aa2a99a..9ee6bbd92 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/ExpressionExperiment.java @@ -26,7 +26,7 @@ @Indexed @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) -@AGRCurationSchemaVersion(min = "2.8.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { SubmittedObject.class }, partial = true) +@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { SubmittedObject.class }, partial = true) public abstract class ExpressionExperiment extends SubmittedObject { @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java index 9f2caee9c..d9d587f5f 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/GeneExpressionExperiment.java @@ -13,9 +13,20 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) -@AGRCurationSchemaVersion(min = "2.8.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { ExpressionExperiment.class }, partial = true) +@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { ExpressionExperiment.class }, partial = true) @Table(indexes = { - @Index(name = "on_index", columnList = "id") + @Index(name = "geneexpressionexperiment_uniqueid_index", columnList = "uniqueid"), + @Index(name = "geneexpressionexperiment_curie_index", columnList = "curie"), + @Index(name = "geneexpressionexperiment_modentityid_index", columnList = "modinternalid"), + @Index(name = "geneexpressionexperiment_modinternalid_index", columnList = "modinternalid"), + @Index(name = "geneexpressionexperiment_singlereference_index", columnList = "singlereference_id"), + @Index(name = "geneexpressionexperiment_entityassayedused_index", columnList = "entityassayed_id"), + @Index(name = "geneexpressionexperiment_expressionassayused_index", columnList = "expressionassayused_id"), + @Index(name = "geneexpressionexperiment_dataprovider_index", columnList = "dataprovider_id"), + @Index(name = "geneexpressionexperiment_internal_index", columnList = "internal"), + @Index(name = "geneexpressionexperiment_obsolete_index", columnList = "obsolete"), + @Index(name = "geneexpressionexperiment_createdby_index", columnList = "createdby_id"), + @Index(name = "geneexpressionexperiment_updatedby_index", columnList = "updatedby_id") }) public class GeneExpressionExperiment extends ExpressionExperiment { } From 12184f2f6a60c060fc00e7e444eb5152e4ca0e04 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Fri, 15 Nov 2024 14:29:17 +0000 Subject: [PATCH 067/118] SCRUM-3953: rename constraints --- .../db/migration/v0.38.0.8__gene_expression_experiment.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql index 3024a8d5c..7cb52db6c 100644 --- a/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql +++ b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql @@ -40,8 +40,8 @@ CREATE TABLE IF NOT EXISTS geneexpressionexperiment_geneexpressionannotation ( geneexpressionexperiment_id bigint NOT NULL, expressionannotations_id bigint NOT NULL, - CONSTRAINT gen_exp_exp_experiment_fkey FOREIGN KEY (geneexpressionexperiment_id) REFERENCES geneexpressionexperiment (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, - CONSTRAINT gen_exp_exp_annotation_fkey FOREIGN KEY (expressionannotations_id) REFERENCES geneexpressionannotation (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION + CONSTRAINT geexperiment_geannotation_geexperiment_id_fk FOREIGN KEY (geneexpressionexperiment_id) REFERENCES geneexpressionexperiment (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION, + CONSTRAINT geexperiment_geannotation_geannotation_id_fk FOREIGN KEY (expressionannotations_id) REFERENCES geneexpressionannotation (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION ); CREATE INDEX geneexpressionexperiment_gea_experiment_index ON geneexpressionexperiment_geneexpressionannotation USING btree (geneexpressionexperiment_id); From f9d36c4b719074a915058dbb1d8df53273e2334d Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Fri, 15 Nov 2024 17:29:04 +0000 Subject: [PATCH 068/118] SCRUM-3953: add experiments cleanup --- .../executors/GeneExpressionExecutor.java | 21 ++++++++++++++----- .../GeneExpressionExperimentService.java | 16 ++++++++++---- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index 2377886e3..8aec23e22 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -28,9 +28,9 @@ public class GeneExpressionExecutor extends LoadFileExecutor { @Inject GeneExpressionExperimentService geneExpressionExperimentService; static final String ANNOTATIONS = "gen_exp_annotations"; static final String EXPERIMENTS = "gen_exp_experiments"; - public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try { BulkFMSLoad fms = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fms.getFmsDataSubType()); @@ -51,12 +51,17 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { List annotationIdsLoaded = new ArrayList<>(); List annotationIdsBefore = geneExpressionAnnotationService.getAnnotationIdsByDataProvider(dataProvider); + List experimentIdsLoaded = new ArrayList<>(); + List experimentIdsBefore = geneExpressionExperimentService.getExperimentIdsByDataProvider(dataProvider); + + bulkLoadFileHistory.setCount(ANNOTATIONS, geneExpressionIngestFmsDTO.getData().size()); boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); if (success) { - runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene expression annotations"); + runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, ANNOTATIONS); bulkLoadFileHistory.setCount(EXPERIMENTS, geneExpressionAnnotationService.getExperiments().size()); - loadExperiments(bulkLoadFileHistory); + loadExperiments(bulkLoadFileHistory, dataProvider, experimentIdsLoaded); + runCleanup(geneExpressionExperimentService, bulkLoadFileHistory, dataProvider.name(), experimentIdsBefore, experimentIdsLoaded, EXPERIMENTS); } bulkLoadFileHistory.finishLoad(); @@ -69,14 +74,16 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { } } - private void loadExperiments(BulkLoadFileHistory history) { + + private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List experimentIdsLoaded) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); Map> experiments = geneExpressionAnnotationService.getExperiments(); ph.startProcess("Saving gene expression experiments: ", experiments.size()); for (String experimentId: experiments.keySet()) { try { - GeneExpressionExperiment experiment = geneExpressionExperimentService.upsert(experimentId, experiments.get(experimentId)); + GeneExpressionExperiment experiment = geneExpressionExperimentService.upsert(experimentId, experiments.get(experimentId), dataProvider); if (experiment != null) { + experimentIdsLoaded.add(experiment.getId()); history.incrementCompleted(EXPERIMENTS); } } catch (ObjectUpdateException e) { @@ -91,4 +98,8 @@ private void loadExperiments(BulkLoadFileHistory history) { } updateHistory(history); } + +// private void cleanUpExperiments(BulkLoadFileHistory bulkLoadFileHistory) { +// System.out.println("cleaning ..."); +// } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java index 7ffdd7cd6..70e702750 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java @@ -5,8 +5,10 @@ import jakarta.inject.Inject; import jakarta.transaction.Transactional; import lombok.extern.jbosslog.JBossLog; +import org.alliancegenome.curation_api.constants.EntityFieldConstants; import org.alliancegenome.curation_api.dao.GeneExpressionAnnotationDAO; import org.alliancegenome.curation_api.dao.GeneExpressionExperimentDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.model.entities.GeneExpressionAnnotation; import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; @@ -14,8 +16,7 @@ import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.ontology.MmoTermService; -import java.util.HashSet; -import java.util.Set; +import java.util.*; @JBossLog @RequestScoped @@ -26,6 +27,7 @@ public class GeneExpressionExperimentService extends BaseEntityCrudService getExperimentIdsByDataProvider(BackendBulkDataProvider dataProvider) { + Map params = new HashMap<>(); + params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); + return geneExpressionExperimentDAO.findIdsByParams(params); + } + @Transactional - public GeneExpressionExperiment upsert(String experimentId, Set geneExpressionAnnotationIds) throws ValidationException { + public GeneExpressionExperiment upsert(String experimentId, Set geneExpressionAnnotationIds, BackendBulkDataProvider dataProvider) throws ValidationException { GeneExpressionExperiment geneExpressionExperiment; Set annotations; @@ -50,7 +58,7 @@ public GeneExpressionExperiment upsert(String experimentId, Set geneExpr geneExpressionExperiment = new GeneExpressionExperiment(); geneExpressionExperiment.setUniqueId(experimentId); } - + geneExpressionExperiment.setDataProvider(dataProviderService.getDefaultDataProvider(dataProvider.sourceOrganization)); geneExpressionExperiment.setEntityAssayed(geneService.findByIdentifierString(geneId)); geneExpressionExperiment.setSingleReference(referenceService.getByCurie(referenceId).getEntity()); geneExpressionExperiment.setExpressionAssayUsed(mmoTermService.findByCurie(assayId)); From 673190b7b246ac6241327a0f7c4aaedd16da7642 Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Mon, 18 Nov 2024 10:12:54 -0600 Subject: [PATCH 069/118] SCRUM-4572 left justify ListDialog columns --- .../src/components/Templates/dialog/ListDialogTemplate.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/cliapp/src/components/Templates/dialog/ListDialogTemplate.js b/src/main/cliapp/src/components/Templates/dialog/ListDialogTemplate.js index 13da123e8..3dd8c92ad 100644 --- a/src/main/cliapp/src/components/Templates/dialog/ListDialogTemplate.js +++ b/src/main/cliapp/src/components/Templates/dialog/ListDialogTemplate.js @@ -9,7 +9,7 @@ export const ListDialogTemplate = ({ entities, handleOpen, getTextField, underli const listTemplate = (item) => (
); From 42262519726553cc154a91d2b2fd6c3404f5ced1 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Tue, 19 Nov 2024 13:42:26 +0000 Subject: [PATCH 070/118] SCRUM-3953: override runloadApi --- ...eneExpressionAnnotationCrudController.java | 2 +- .../executors/GeneExpressionExecutor.java | 31 +++++++++++++------ ...neExpressionAnnotationFmsDTOValidator.java | 8 ++--- .../v0.38.0.8__gene_expression_experiment.sql | 2 -- .../ExpressionBulkUploadFmsITCase.java | 18 +++++++++-- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java index 1e1ab09c7..42322337b 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/GeneExpressionAnnotationCrudController.java @@ -32,7 +32,7 @@ public ObjectResponse getByIdentifier(String identifie } public APIResponse updateExpressionAnnotations(String dataProvider, List annotations) { - APIResponse response = geneExpressionExecutor.runLoadApi(geneExpressionAnnotationService, dataProvider, annotations); + APIResponse response = geneExpressionExecutor.runLoadAPI(geneExpressionAnnotationService, dataProvider, annotations); return response; } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index 8aec23e22..b8747d8d3 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -9,7 +9,10 @@ import org.alliancegenome.curation_api.model.entities.GeneExpressionExperiment; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionFmsDTO; import org.alliancegenome.curation_api.model.ingest.dto.fms.GeneExpressionIngestFmsDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.GeneExpressionAnnotationService; import org.alliancegenome.curation_api.services.GeneExpressionExperimentService; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; @@ -26,8 +29,8 @@ public class GeneExpressionExecutor extends LoadFileExecutor { @Inject GeneExpressionAnnotationService geneExpressionAnnotationService; @Inject GeneExpressionExperimentService geneExpressionExperimentService; - static final String ANNOTATIONS = "gen_exp_annotations"; - static final String EXPERIMENTS = "gen_exp_experiments"; + static final String ANNOTATIONS = "Annotations"; + static final String EXPERIMENTS = "Experiments"; public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { @@ -54,12 +57,10 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { List experimentIdsLoaded = new ArrayList<>(); List experimentIdsBefore = geneExpressionExperimentService.getExperimentIdsByDataProvider(dataProvider); - bulkLoadFileHistory.setCount(ANNOTATIONS, geneExpressionIngestFmsDTO.getData().size()); boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); if (success) { runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, ANNOTATIONS); - bulkLoadFileHistory.setCount(EXPERIMENTS, geneExpressionAnnotationService.getExperiments().size()); loadExperiments(bulkLoadFileHistory, dataProvider, experimentIdsLoaded); runCleanup(geneExpressionExperimentService, bulkLoadFileHistory, dataProvider.name(), experimentIdsBefore, experimentIdsLoaded, EXPERIMENTS); } @@ -74,11 +75,27 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { } } + public APIResponse runLoadAPI(GeneExpressionAnnotationService service, String dataProviderName, List objectList) { + List idsLoaded = new ArrayList<>(); + BulkLoadFileHistory history = new BulkLoadFileHistory(objectList.size()); + history = bulkLoadFileHistoryDAO.persist(history); + BackendBulkDataProvider dataProvider = null; + if (dataProviderName != null) { + dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + } + boolean success = runLoad(service, history, dataProvider, objectList, idsLoaded, true); + if (success) { + loadExperiments(history, dataProvider, new ArrayList<>()); + } + history.finishLoad(); + return new LoadHistoryResponce(history); + } - private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List experimentIdsLoaded) { + private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List experimentIdsLoaded) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); Map> experiments = geneExpressionAnnotationService.getExperiments(); ph.startProcess("Saving gene expression experiments: ", experiments.size()); + history.setCount(EXPERIMENTS, geneExpressionAnnotationService.getExperiments().size()); for (String experimentId: experiments.keySet()) { try { GeneExpressionExperiment experiment = geneExpressionExperimentService.upsert(experimentId, experiments.get(experimentId), dataProvider); @@ -98,8 +115,4 @@ private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvide } updateHistory(history); } - -// private void cleanUpExperiments(BulkLoadFileHistory bulkLoadFileHistory) { -// System.out.println("cleaning ..."); -// } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index bf2ffeea9..ee5e5b322 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -47,14 +47,13 @@ public class GeneExpressionAnnotationFmsDTOValidator { public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpressionFmsDTO, BackendBulkDataProvider dataProvider, Map> experiments) throws ValidationException { ObjectResponse response = new ObjectResponse<>(); - GeneExpressionAnnotation geneExpressionAnnotation; - String uniqueId; - String referenceCurie; + GeneExpressionAnnotation geneExpressionAnnotation = new GeneExpressionAnnotation(); + String uniqueId = "empty"; + String referenceCurie = "empty"; ObjectResponse singleReferenceResponse = validateEvidence(geneExpressionFmsDTO); if (singleReferenceResponse.hasErrors()) { response.addErrorMessage("singleReference", singleReferenceResponse.errorMessagesString()); - throw new ObjectValidationException(geneExpressionFmsDTO, response.errorMessagesString()); } else { referenceCurie = singleReferenceResponse.getEntity().getCurie(); uniqueId = geneExpressionAnnotationUniqueIdHelper.generateUniqueId(geneExpressionFmsDTO, referenceCurie); @@ -62,7 +61,6 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr if (annotationDB != null && annotationDB.getSingleResult() != null) { geneExpressionAnnotation = annotationDB.getSingleResult(); } else { - geneExpressionAnnotation = new GeneExpressionAnnotation(); geneExpressionAnnotation.setUniqueId(uniqueId); } if (geneExpressionAnnotation.getExpressionPattern() == null) { diff --git a/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql index 7cb52db6c..dbf6a1a9f 100644 --- a/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql +++ b/src/main/resources/db/migration/v0.38.0.8__gene_expression_experiment.sql @@ -34,8 +34,6 @@ CREATE INDEX geneexpressionexperiment_obsolete_index ON geneexpressionexperiment CREATE INDEX geneexpressionexperiment_createdby_index ON geneexpressionexperiment USING btree (createdby_id); CREATE INDEX geneexpressionexperiment_updatedby_index ON geneexpressionexperiment USING btree (updatedby_id); -CREATE SEQUENCE geneexpressionexperiment_geneexpressionannotation_seq START WITH 1 INCREMENT BY 50 NO MINVALUE NO MAXVALUE CACHE 1; - CREATE TABLE IF NOT EXISTS geneexpressionexperiment_geneexpressionannotation ( geneexpressionexperiment_id bigint NOT NULL, expressionannotations_id bigint NOT NULL, diff --git a/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java index 9d9d9ab02..bdf753e26 100644 --- a/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java @@ -30,6 +30,7 @@ public class ExpressionBulkUploadFmsITCase extends BaseITCase { private final String expressionBulkPostEndpoint = "/api/gene-expression-annotation/bulk/ZFIN/annotationFile"; private final String expressionTestFilePath = "src/test/resources/bulk/fms/07_expression/"; private final String expressionFindEndpoint = "/api/gene-expression-annotation/find?limit=100&page=0"; + private final String experimentFindEndpoint = "/api/gene-expression-experiment/find?limit=100&page=0"; private final String taxon = "NCBITaxon:7955"; private final String gene = "GEXPTEST:GENE001"; private final String mmoTerm = "GEXPTEST:assay001"; @@ -37,10 +38,10 @@ public class ExpressionBulkUploadFmsITCase extends BaseITCase { private final String agrReferenceId = "AGRKB:101000000668377"; private final String publicationId = "PMID:009"; private final String agrPublicationId = "AGRKB:101000000668376"; - + private final String pipe = "|"; + private final String experimentUniqueIdExpected = gene + pipe + agrPublicationId + pipe + mmoTerm; private final String stageTermId = "ZFS:001"; private final String stageUberonTermId = "UBERON:001"; - private final String anatomicalStructureTermId = "ANAT:001"; private final String anatomicalSubstructureTermId = "ANAT:002"; private final String cellularComponentTermId = "GOTEST:0012"; @@ -66,6 +67,19 @@ public void init() { public void expressionBulkUploadAllFields() throws Exception { loadRequiredEntities(); checkSuccessfulBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "AF_01_all_fields.json"); + + RestAssured.given().when() + .header("Content-Type", "application/json") + .body("{}") + .post(experimentFindEndpoint) + .then() + .statusCode(200) + .body("totalResults", is(1)) + .body("results", hasSize(1)) + .body("results[0].dataProvider.sourceOrganization.abbreviation", is("ZFIN")) + .body("results[0].uniqueId", is(experimentUniqueIdExpected)) + .body("results[0].expressionAnnotations.size()", is(1)); + RestAssured.given().when() .header("Content-Type", "application/json") .body("{}") From 12267a93b71afef5e117a630cd19ff3244e52e88 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Tue, 19 Nov 2024 13:45:38 +0000 Subject: [PATCH 071/118] SCRUM-3953: fix indentation --- .../curation_api/jobs/executors/GeneExpressionExecutor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index b8747d8d3..58ec6fa4c 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -91,7 +91,7 @@ public APIResponse runLoadAPI(GeneExpressionAnnotationService service, String da return new LoadHistoryResponce(history); } - private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List experimentIdsLoaded) { + private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List experimentIdsLoaded) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); Map> experiments = geneExpressionAnnotationService.getExperiments(); ph.startProcess("Saving gene expression experiments: ", experiments.size()); From 6db58e9c732d74cbb781e76581f1921723191c1b Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Tue, 19 Nov 2024 14:03:18 +0000 Subject: [PATCH 072/118] SCRUM-3953: refactor runLoad --- .../executors/GeneExpressionExecutor.java | 2 +- .../GeneGeneticInteractionExecutor.java | 2 +- .../GeneMolecularInteractionExecutor.java | 6 +-- .../jobs/executors/LoadFileExecutor.java | 46 ++++++++----------- .../jobs/executors/ParalogyExecutor.java | 2 +- 5 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index 58ec6fa4c..fee8b5d52 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -83,7 +83,7 @@ public APIResponse runLoadAPI(GeneExpressionAnnotationService service, String da if (dataProviderName != null) { dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); } - boolean success = runLoad(service, history, dataProvider, objectList, idsLoaded, true); + boolean success = runLoad(service, history, dataProvider, objectList, idsLoaded, true, "Records"); if (success) { loadExperiments(history, dataProvider, new ArrayList<>()); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java index 74e9c0259..6bdb7dc9d 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java @@ -41,7 +41,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount(interactionData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(geneGeneticInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false); + boolean success = runLoad(geneGeneticInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false, "Records"); if (success) { runCleanup(geneInteractionService, bulkLoadFileHistory, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene genetic interaction"); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java index 482ee00be..d3c829ae1 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java @@ -37,11 +37,11 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { List interactionIdsLoaded = new ArrayList<>(); List interactionIdsBefore = geneMolecularInteractionDAO.findAllIds().getResults(); - + bulkLoadFileHistory.setCount(interactionData.size()); updateHistory(bulkLoadFileHistory); - - boolean success = runLoad(geneMolecularInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false); + + boolean success = runLoad(geneMolecularInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false, "Records"); if (success) { runCleanup(geneInteractionService, bulkLoadFileHistory, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene molecular interaction"); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index 451177395..2e38cd1f8 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -1,14 +1,8 @@ package org.alliancegenome.curation_api.jobs.executors; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.zip.GZIPInputStream; - +import com.fasterxml.jackson.databind.ObjectMapper; +import io.quarkus.logging.Log; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileExceptionDAO; import org.alliancegenome.curation_api.dao.loads.BulkLoadFileHistoryDAO; @@ -35,10 +29,10 @@ import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.StringUtils; -import com.fasterxml.jackson.databind.ObjectMapper; - -import io.quarkus.logging.Log; -import jakarta.inject.Inject; +import java.io.FileInputStream; +import java.util.*; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; public class LoadFileExecutor { @@ -124,9 +118,9 @@ protected IngestDTO readIngestFile(BulkLoadFileHistory bulkLoadFileHistory, Clas if (StringUtils.isNotBlank(ingestDto.getAllianceMemberReleaseVersion())) { bulkLoadFileHistory.getBulkLoadFile().setAllianceMemberReleaseVersion(ingestDto.getAllianceMemberReleaseVersion()); } - + bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - + if (!checkSchemaVersion(bulkLoadFileHistory, dtoClass)) { return null; } @@ -190,16 +184,16 @@ public APIResponse runLoadApi(BaseU if (dataProviderName != null) { dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); } - runLoad(service, history, dataProvider, objectList, idsLoaded, true); + runLoad(service, history, dataProvider, objectList, idsLoaded, true, "Records"); history.finishLoad(); return new LoadHistoryResponce(history); } - + protected boolean runLoad(BaseUpsertServiceInterface service, BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsAdded) { - return runLoad(service, history, dataProvider, objectList, idsAdded, true); + return runLoad(service, history, dataProvider, objectList, idsAdded, true, "Records"); } - protected boolean runLoad(BaseUpsertServiceInterface service, BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsAdded, Boolean terminateFailing) { + protected boolean runLoad(BaseUpsertServiceInterface service, BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsAdded, Boolean terminateFailing, String countType) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); if (CollectionUtils.isNotEmpty(objectList)) { @@ -208,24 +202,24 @@ protected boolean runLoad(BaseUpser loadMessage = loadMessage + " for " + dataProvider.name(); } ph.startProcess(loadMessage, objectList.size()); - + updateHistory(history); for (T dtoObject : objectList) { try { E dbObject = service.upsert(dtoObject, dataProvider); - history.incrementCompleted(); + history.incrementCompleted(countType); if (idsAdded != null) { idsAdded.add(dbObject.getId()); } } catch (ObjectUpdateException e) { - history.incrementFailed(); + history.incrementFailed(countType); addException(history, e.getData()); } catch (KnownIssueValidationException e) { Log.debug(e.getMessage()); history.incrementSkipped(); } catch (Exception e) { e.printStackTrace(); - history.incrementFailed(); + history.incrementFailed(countType); addException(history, new ObjectUpdateExceptionData(dtoObject, e.getMessage(), e.getStackTrace())); } if (terminateFailing && history.getErrorRate() > 0.25) { @@ -259,12 +253,12 @@ protected boolean runLoad(BaseUpser Log.debug("runLoad: Remove: " + dataProviderName + " " + idsToRemove.size()); String countType = loadTypeString + " Deleted"; - + long existingDeletes = history.getCount(countType).getTotal() == null ? 0 : history.getCount(countType).getTotal(); history.setCount(countType, idsToRemove.size() + existingDeletes); String loadDescription = dataProviderName + " " + loadTypeString + " bulk load (" + history.getBulkLoadFile().getMd5Sum() + ")"; - + ProcessDisplayHelper ph = new ProcessDisplayHelper(10000); ph.startProcess("Deletion/deprecation of: " + dataProviderName + " " + loadTypeString, idsToRemove.size()); //updateHistory(history); @@ -302,7 +296,7 @@ protected void failLoad(BulkLoadFileHistory bulkLoadFileHistory, Exception e) { slackNotifier.slackalert(bulkLoadFileHistory); updateHistory(bulkLoadFileHistory); } - + protected void failLoadAboveErrorRateCutoff(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setBulkloadStatus(JobStatus.FAILED); bulkLoadFileHistory.setErrorMessage("Failure rate exceeded cutoff"); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java index a4f5ff276..8e6095894 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java @@ -48,7 +48,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount(paralogyData.getData().size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(geneToGeneParalogyService, bulkLoadFileHistory, dataProvider, paralogyData.getData(), paralogyIdsLoaded, false); + boolean success = runLoad(geneToGeneParalogyService, bulkLoadFileHistory, dataProvider, paralogyData.getData(), paralogyIdsLoaded, false, "Records"); if (success) { runCleanup(geneToGeneParalogyService, bulkLoadFileHistory, fms.getFmsDataSubType(), paralogyPairsBefore, paralogyIdsLoaded, fms.getFmsDataType(), false); From 9d1662cb4f95d8aaf6434344f0bd367146de64cb Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Tue, 19 Nov 2024 14:46:06 +0000 Subject: [PATCH 073/118] SCRUM-3953: add more checks --- .../curation_api/jobs/executors/LoadFileExecutor.java | 2 +- .../curation_api/ExpressionBulkUploadFmsITCase.java | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index 2e38cd1f8..929edeb0a 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -216,7 +216,7 @@ protected boolean runLoad(BaseUpser addException(history, e.getData()); } catch (KnownIssueValidationException e) { Log.debug(e.getMessage()); - history.incrementSkipped(); + history.incrementSkipped(countType); } catch (Exception e) { e.printStackTrace(); history.incrementFailed(countType); diff --git a/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java index bdf753e26..c02becbf1 100644 --- a/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java @@ -53,6 +53,8 @@ public class ExpressionBulkUploadFmsITCase extends BaseITCase { private final String anatomicalStructureUberonTermId2 = "UBERON:005"; private final String anatomicalSubstructureUberonTermId1 = "UBERON:006"; private final String anatomicalSubstructureUberonTermId2 = "UBERON:007"; + private final String annotationUniqueIdExpected = String.join(pipe, mmoTerm, gene, agrPublicationId, stageTermId, + "stage1", "trunk", anatomicalStructureTermId, cellularComponentTermId); @BeforeEach public void init() { @@ -78,7 +80,13 @@ public void expressionBulkUploadAllFields() throws Exception { .body("results", hasSize(1)) .body("results[0].dataProvider.sourceOrganization.abbreviation", is("ZFIN")) .body("results[0].uniqueId", is(experimentUniqueIdExpected)) - .body("results[0].expressionAnnotations.size()", is(1)); + .body("results[0].expressionAnnotations.size()", is(1)) + .body("results[0].entityAssayed.modEntityId", is(gene)) + .body("results[0].singleReference.crossReferences[0].referencedCurie", is(publicationId)) + .body("results[0].expressionAssayUsed.curie", is(mmoTerm)) + .body("results[0].obsolete", is(false)) + .body("results[0].internal", is(false)) + .body("results[0].expressionAnnotations[0].uniqueId", is(annotationUniqueIdExpected)); RestAssured.given().when() .header("Content-Type", "application/json") From 0010c39432736a6fbb56193376cf93d806aec19d Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Tue, 19 Nov 2024 15:09:47 +0000 Subject: [PATCH 074/118] SCRUM-3953: more overloading for countType --- .../GeneGeneticInteractionExecutor.java | 24 +++++++++---------- .../GeneMolecularInteractionExecutor.java | 24 +++++++++---------- .../jobs/executors/LoadFileExecutor.java | 8 +++++++ .../jobs/executors/ParalogyExecutor.java | 17 +++++++------ 4 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java index 6bdb7dc9d..4d6a5084e 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneGeneticInteractionExecutor.java @@ -1,10 +1,11 @@ package org.alliancegenome.curation_api.jobs.executors; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.GZIPInputStream; - +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.dao.GeneGeneticInteractionDAO; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; @@ -12,13 +13,10 @@ import org.alliancegenome.curation_api.services.GeneGeneticInteractionService; import org.alliancegenome.curation_api.services.GeneInteractionService; -import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; - -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; @ApplicationScoped public class GeneGeneticInteractionExecutor extends LoadFileExecutor { @@ -41,7 +39,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount(interactionData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(geneGeneticInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false, "Records"); + boolean success = runLoad(geneGeneticInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false); if (success) { runCleanup(geneInteractionService, bulkLoadFileHistory, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene genetic interaction"); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java index d3c829ae1..1714fd19d 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneMolecularInteractionExecutor.java @@ -1,10 +1,11 @@ package org.alliancegenome.curation_api.jobs.executors; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.GZIPInputStream; - +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.dao.GeneMolecularInteractionDAO; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; @@ -12,13 +13,10 @@ import org.alliancegenome.curation_api.services.GeneInteractionService; import org.alliancegenome.curation_api.services.GeneMolecularInteractionService; -import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; - -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; @ApplicationScoped public class GeneMolecularInteractionExecutor extends LoadFileExecutor { @@ -41,7 +39,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount(interactionData.size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(geneMolecularInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false, "Records"); + boolean success = runLoad(geneMolecularInteractionService, bulkLoadFileHistory, null, interactionData, interactionIdsLoaded, false); if (success) { runCleanup(geneInteractionService, bulkLoadFileHistory, "COMBINED", interactionIdsBefore, interactionIdsLoaded, "gene molecular interaction"); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index 929edeb0a..7f619c3d0 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -193,6 +193,14 @@ protected boolean runLoad(BaseUpser return runLoad(service, history, dataProvider, objectList, idsAdded, true, "Records"); } + protected boolean runLoad(BaseUpsertServiceInterface service, BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsAdded, Boolean terminateFailing) { + return runLoad(service, history, dataProvider, objectList, idsAdded, terminateFailing, "Records"); + } + + protected boolean runLoad(BaseUpsertServiceInterface service, BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsAdded, String countType) { + return runLoad(service, history, dataProvider, objectList, idsAdded, true, countType); + } + protected boolean runLoad(BaseUpsertServiceInterface service, BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsAdded, Boolean terminateFailing, String countType) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java index 8e6095894..113871b64 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ParalogyExecutor.java @@ -1,10 +1,8 @@ package org.alliancegenome.curation_api.jobs.executors; -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.GZIPInputStream; - +import io.quarkus.logging.Log; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.dao.GeneToGeneParalogyDAO; import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; @@ -15,9 +13,10 @@ import org.alliancegenome.curation_api.services.GeneToGeneParalogyService; import org.apache.commons.lang3.StringUtils; -import io.quarkus.logging.Log; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; @ApplicationScoped public class ParalogyExecutor extends LoadFileExecutor { @@ -48,7 +47,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { bulkLoadFileHistory.setCount(paralogyData.getData().size()); updateHistory(bulkLoadFileHistory); - boolean success = runLoad(geneToGeneParalogyService, bulkLoadFileHistory, dataProvider, paralogyData.getData(), paralogyIdsLoaded, false, "Records"); + boolean success = runLoad(geneToGeneParalogyService, bulkLoadFileHistory, dataProvider, paralogyData.getData(), paralogyIdsLoaded, false); if (success) { runCleanup(geneToGeneParalogyService, bulkLoadFileHistory, fms.getFmsDataSubType(), paralogyPairsBefore, paralogyIdsLoaded, fms.getFmsDataType(), false); From 11514f86ac501e21a8797f61bfdde41a7228420c Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Wed, 20 Nov 2024 16:52:27 +0000 Subject: [PATCH 075/118] Add fields to JsonIgnore --- .../ConstructGenomicEntityAssociation.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java index b848b3b0c..bee923f50 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java @@ -74,7 +74,8 @@ public class ConstructGenomicEntityAssociation extends EvidenceAssociation { @JsonView({ View.FieldsOnly.class }) @JsonIgnoreProperties({ "alleleGeneAssociations", "constructGenomicEntityAssociations", "sequenceTargetingReagentGeneAssociations", - "transcriptGenomicLocationAssociations", "exonGenomicLocationAssociations", "codingSequenceGenomicLocationAssociations" + "transcriptGenomicLocationAssociations", "exonGenomicLocationAssociations", "codingSequenceGenomicLocationAssociations", + "transcriptGeneAssociations", "geneGenomicLocationAssociations", "transcriptExonAssociations", "transcriptCodingSequenceAssociations" }) private GenomicEntity constructGenomicEntityAssociationObject; From b08e4e59ea5aa95adc8818b7e1d923d5dca3eec0 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Wed, 20 Nov 2024 19:07:20 +0100 Subject: [PATCH 076/118] SCRUM-4190 (#1714) * SCRUM-4190 make xenbase and human gaf file load * SCRUM-4190 fix checkstyle, create new flyway script * adjust xenbase references as org and species * convert to FMS upload * remove unused import * ZFIN-4190 performance enhance cleanup method for construct Genomic Entity Association * Revert "ZFIN-4190 performance enhance cleanup method for construct Genomic Entity Association" This reverts commit c50905c8b2db5412f44702918e1b1271477d0505. * rename flyway scripts * remove superfluous script --- .../GeneOntologyAnnotationExecutor.java | 22 ++--- .../GeneOntologyAnnotationService.java | 27 ++++-- .../services/OrganizationService.java | 18 ++-- .../db/migration/v0.38.0.10__gaf-load.sql | 90 +++++++++++++++++++ 4 files changed, 130 insertions(+), 27 deletions(-) create mode 100644 src/main/resources/db/migration/v0.38.0.10__gaf-load.sql diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java index 9e1737611..f6391b5ab 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneOntologyAnnotationExecutor.java @@ -6,8 +6,8 @@ import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation; import org.alliancegenome.curation_api.model.entities.Organization; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; -import org.alliancegenome.curation_api.model.entities.bulkloads.BulkURLLoad; import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO; import org.alliancegenome.curation_api.services.GeneOntologyAnnotationService; import org.alliancegenome.curation_api.services.OrganizationService; @@ -33,16 +33,11 @@ public class GeneOntologyAnnotationExecutor extends LoadFileExecutor { public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException { - String url = ((BulkURLLoad) bulkLoadFileHistory.getBulkLoad()).getBulkloadUrl(); - - String[] tok = url.split("/"); - String orgAbbrev = tok[tok.length - 1].toUpperCase(); - String abbr = orgAbbrev.split("\\.")[0]; + String abbr = ((BulkFMSLoad) bulkLoadFileHistory.getBulkLoad()).getFmsDataSubType(); Organization organization = organizationService.getByAbbr(abbr).getEntity(); // curie, List Map> uiMap = new HashMap<>(); - Set orgIDs = new HashSet<>(); GZIPInputStream stream = new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())); try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) { Stream lines = br.lines(); @@ -51,10 +46,9 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException lines.filter(s -> !s.startsWith("!") && StringUtils.isNotEmpty(s)).forEach(s -> { String[] token = s.split("\t"); String orgID = token[0]; - orgIDs.add(orgID); String modID = token[1]; String goID = token[4]; - if (abbr.equals(orgID)) { + if (abbr.equalsIgnoreCase(orgID) || orgID.equalsIgnoreCase("Xenbase") || abbr.equals("HUMAN") && orgID.equals("RGD")) { List goIDs = uiMap.computeIfAbsent(modID, list -> new ArrayList<>()); goIDs.add(goID); } @@ -77,7 +71,15 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException .stream() .map(entry -> entry.getValue().stream().map(goID -> { GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO(); - dto.setGeneIdentifier(abbr + ":" + entry.getKey()); + String prefix = abbr; + if (abbr.equalsIgnoreCase("XB")) { + prefix = "Xenbase"; + } + if (abbr.equalsIgnoreCase("HUMAN")) { + prefix = null; + } + String geneIdentifier = prefix != null ? prefix + ":" + entry.getKey() : entry.getKey(); + dto.setGeneIdentifier(geneIdentifier); dto.setGoTermCurie(goID); return dto; }).toList()).flatMap(Collection::stream).toList(); diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java index 90ff6d504..2c7575e7f 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneOntologyAnnotationService.java @@ -15,14 +15,17 @@ import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.validation.DataProviderValidator; +import org.apache.commons.collections.CollectionUtils; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; @RequestScoped public class GeneOntologyAnnotationService extends BaseEntityCrudService { - private Species species; + private List species; public static final String RESOURCE_DESCRIPTOR_PREFIX = "ENSEMBL"; public static final String RESOURCE_DESCRIPTOR_PAGE_NAME = "default"; // @@ -32,7 +35,6 @@ public class GeneOntologyAnnotationService extends BaseEntityCrudService gafMap = new HashMap<>(); - @Inject @AuthenticatedUser protected Person authenticatedPerson; @@ -92,7 +94,8 @@ private void addNewRecordToMap(GeneOntologyAnnotation gafNew, GeneOntologyAnnota public Long getGeneID(GeneOntologyAnnotationDTO uiEntity, String orgAbbreviation) { if (accessionGeneMap.isEmpty()) { - accessionGeneMap = geneDAO.getAllGeneIdsPerSpecies(getSpecies(orgAbbreviation)); + List speciesList = getSpecies(orgAbbreviation); + speciesList.forEach(species -> accessionGeneMap.putAll(geneDAO.getAllGeneIdsPerSpecies(species))); } Long geneID = accessionGeneMap.get(uiEntity.getGeneIdentifier()); return geneID; @@ -106,16 +109,24 @@ private Long getGOID(GeneOntologyAnnotationDTO uiEntity) { return goID; } - private Species getSpecies(String orgAbbreviation) { - if (species != null) { + private List getSpecies(String orgAbbreviation) { + if (CollectionUtils.isNotEmpty(species)) { return species; } - Map map = new HashMap<>(); - map.put("displayName", orgAbbreviation); - species = speciesDAO.findByParams(map).getSingleResult(); + if (orgAbbreviation.equalsIgnoreCase("XB")) { + species = new ArrayList<>(); + species.add(getSingleSpecies("XBXL")); + species.add(getSingleSpecies("XBXT")); + } else { + species = List.of(getSingleSpecies(orgAbbreviation)); + } return species; } + private Species getSingleSpecies(String orgAbbreviation) { + return speciesDAO.findByField("displayName", orgAbbreviation).getSingleResult(); + } + public ObjectResponse validate(DataProvider uiEntity) { return dataProviderValidator.validateDataProvider(uiEntity, null, true); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/OrganizationService.java b/src/main/java/org/alliancegenome/curation_api/services/OrganizationService.java index 3b4d20b4b..672c720e9 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/OrganizationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/OrganizationService.java @@ -1,23 +1,23 @@ package org.alliancegenome.curation_api.services; -import java.util.Date; -import java.util.HashMap; - +import io.quarkus.logging.Log; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.dao.OrganizationDAO; import org.alliancegenome.curation_api.model.entities.Organization; import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; -import io.quarkus.logging.Log; -import jakarta.annotation.PostConstruct; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; +import java.util.Date; +import java.util.HashMap; @RequestScoped public class OrganizationService extends BaseEntityCrudService { - @Inject OrganizationDAO organizationDAO; + @Inject + OrganizationDAO organizationDAO; Date orgRequest; HashMap orgIdCacheMap = new HashMap<>(); @@ -54,7 +54,7 @@ public ObjectResponse getById(Long id) { public ObjectResponse getByAbbr(String abbr) { Organization org = null; - SearchResponse orgResponse = null; + SearchResponse orgResponse; if (orgRequest != null) { if (orgAbbrCacheMap.containsKey(abbr)) { diff --git a/src/main/resources/db/migration/v0.38.0.10__gaf-load.sql b/src/main/resources/db/migration/v0.38.0.10__gaf-load.sql new file mode 100644 index 000000000..6818da615 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.10__gaf-load.sql @@ -0,0 +1,90 @@ +-- delete old bulk URL load +delete +from bulkurlload +where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF'); + +delete from bulkfmsload +where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF'); + +delete +from bulkscheduledload +where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF'); + +delete from bulkloadfilehistory +where bulkload_id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF'); + +delete from bulkload +where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF'); + +delete from bulkloadgroup where name = 'File Management System (FMS) GAF Loads'; +-- Create bulk loads got the GAF load +INSERT INTO bulkloadgroup (id, name) +VALUES (nextval('bulkloadgroup_seq'), 'File Management System (FMS) GAF Loads'); +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'FB GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'HUMAN GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'MGI GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'RGD GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'SGD GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'WB GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'XB GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'GAF', 'ZFIN GAF Load', 'STOPPED', id +FROM bulkloadgroup +WHERE name = 'File Management System (FMS) GAF Loads'; +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', false +FROM bulkload +WHERE backendbulkloadtype = 'GAF'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'FB' +FROM bulkload +WHERE name = 'FB GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'HUMAN' +FROM bulkload +WHERE name = 'HUMAN GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'MGI' +FROM bulkload +WHERE name = 'MGI GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'RGD' +FROM bulkload +WHERE name = 'RGD GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'SGD' +FROM bulkload +WHERE name = 'SGD GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'WB' +FROM bulkload +WHERE name = 'WB GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'XB' +FROM bulkload +WHERE name = 'XB GAF Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'GAF', 'ZFIN' +FROM bulkload +WHERE name = 'ZFIN GAF Load'; From 9cfca0132ae10fd8109369f677f90b12e6d97f78 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Wed, 20 Nov 2024 18:57:24 +0000 Subject: [PATCH 077/118] SCRUM-3953: add missing XBXL load --- .../constants/EntityFieldConstants.java | 15 +++++++++------ .../services/GeneExpressionExperimentService.java | 4 ++++ .../GeneExpressionAnnotationFmsDTOValidator.java | 1 + .../v0.38.0.12__gene_expression_xbxl.sql | 8 ++++++++ 4 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 src/main/resources/db/migration/v0.38.0.12__gene_expression_xbxl.sql diff --git a/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java index 49c1cc358..557a17593 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/EntityFieldConstants.java @@ -11,10 +11,11 @@ private EntityFieldConstants() { public static final String SOURCE_ORGANIZATION = "sourceOrganization.abbreviation"; public static final String DATA_PROVIDER = "dataProvider." + SOURCE_ORGANIZATION; public static final String SECONDARY_DATA_PROVIDER = "secondaryDataProvider." + SOURCE_ORGANIZATION; - + public static final String DA_SUBJECT = "diseaseAnnotationSubject"; public static final String EA_SUBJECT = "expressionAnnotationSubject"; public static final String PA_SUBJECT = "phenotypeAnnotationSubject"; + public static final String EXP_EXPERIMENT_SUBJECT = "entityAssayed"; public static final String ALLELE_ASSOCIATION_SUBJECT = "alleleAssociationSubject"; public static final String CODING_SEQUENCE_ASSOCIATION_SUBJECT = "codingSequenceAssociationSubject"; public static final String CONSTRUCT_ASSOCIATION_SUBJECT = "constructAssociationSubject"; @@ -23,11 +24,13 @@ private EntityFieldConstants() { public static final String VARIANT_ASSOCIATION_SUBJECT = "variantAssociationSubject"; public static final String SQTR_ASSOCIATION_SUBJECT = "sequenceTargetingReagentAssociationSubject"; public static final String TRANSCRIPT_ASSOCIATION_SUBJECT = "transcriptAssociationSubject"; - + + public static final String DA_SUBJECT_TAXON = DA_SUBJECT + "." + TAXON; public static final String EA_SUBJECT_TAXON = EA_SUBJECT + "." + TAXON; public static final String PA_SUBJECT_TAXON = PA_SUBJECT + "." + TAXON; - + public static final String EXP_EXPERIMENT_TAXON = EXP_EXPERIMENT_SUBJECT + "." + TAXON; + public static final String ALLELE_ASSOCIATION_SUBJECT_DATA_PROVIDER = ALLELE_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String CODING_SEQUENCE_ASSOCIATION_SUBJECT_DATA_PROVIDER = CODING_SEQUENCE_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String CONSTRUCT_ASSOCIATION_SUBJECT_DATA_PROVIDER = CONSTRUCT_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; @@ -36,13 +39,13 @@ private EntityFieldConstants() { public static final String VARIANT_ASSOCIATION_SUBJECT_DATA_PROVIDER = VARIANT_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String SQTR_ASSOCIATION_SUBJECT_DATA_PROVIDER = SQTR_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; public static final String TRANSCRIPT_ASSOCIATION_SUBJECT_DATA_PROVIDER = TRANSCRIPT_ASSOCIATION_SUBJECT + "." + DATA_PROVIDER; - + public static final String CODING_SEQUENCE_ASSOCIATION_SUBJECT_TAXON = CODING_SEQUENCE_ASSOCIATION_SUBJECT + "." + TAXON; public static final String EXON_ASSOCIATION_SUBJECT_TAXON = EXON_ASSOCIATION_SUBJECT + "." + TAXON; public static final String GENE_ASSOCIATION_SUBJECT_TAXON = GENE_ASSOCIATION_SUBJECT + "." + TAXON; public static final String TRANSCRIPT_ASSOCIATION_SUBJECT_TAXON = TRANSCRIPT_ASSOCIATION_SUBJECT + "." + TAXON; public static final String VARIANT_ASSOCIATION_SUBJECT_TAXON = VARIANT_ASSOCIATION_SUBJECT + "." + TAXON; - + public static final String GENOMIC_LOCATION_ASSOCIATION_OBJECT = "GenomicLocationAssociationObject"; public static final String CODING_SEQUENCE_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "codingSequence" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; public static final String EXON_GENOMIC_LOCATION_ASSOCIATION_OBJECT = "exon" + GENOMIC_LOCATION_ASSOCIATION_OBJECT; @@ -53,7 +56,7 @@ private EntityFieldConstants() { public static final String EXON_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "exon" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; public static final String TRANSCRIPT_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "transcript" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; public static final String GENE_GENOMIC_LOCATION_ASSOCIATION_OBJECT_ASSEMBLY = "gene" + GENOMIC_LOCATION_ASSOCIATION_OBJECT + "." + ASSEMBLY; - + public static final String SUBJECT_GENE_DATA_PROVIDER = "subjectGene." + DATA_PROVIDER; public static final String SUBJECT_GENE_TAXON = "subjectGene." + TAXON; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java index 70e702750..4d1a8514f 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GeneExpressionExperimentService.java @@ -15,6 +15,7 @@ import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.ontology.MmoTermService; +import org.apache.commons.lang.StringUtils; import java.util.*; @@ -38,6 +39,9 @@ protected void init() { public List getExperimentIdsByDataProvider(BackendBulkDataProvider dataProvider) { Map params = new HashMap<>(); params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD") || StringUtils.equals(dataProvider.sourceOrganization, "XB")) { + params.put(EntityFieldConstants.EXP_EXPERIMENT_TAXON, dataProvider.canonicalTaxonCurie); + } return geneExpressionExperimentDAO.findIdsByParams(params); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index ee5e5b322..ec6a8a36f 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -48,6 +48,7 @@ public class GeneExpressionAnnotationFmsDTOValidator { public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpressionFmsDTO, BackendBulkDataProvider dataProvider, Map> experiments) throws ValidationException { ObjectResponse response = new ObjectResponse<>(); GeneExpressionAnnotation geneExpressionAnnotation = new GeneExpressionAnnotation(); + geneExpressionAnnotation.setExpressionPattern(new ExpressionPattern()); String uniqueId = "empty"; String referenceCurie = "empty"; diff --git a/src/main/resources/db/migration/v0.38.0.12__gene_expression_xbxl.sql b/src/main/resources/db/migration/v0.38.0.12__gene_expression_xbxl.sql new file mode 100644 index 000000000..9fd5fd29b --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.12__gene_expression_xbxl.sql @@ -0,0 +1,8 @@ +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) +SELECT nextval('bulkload_seq'), 'EXPRESSION', 'XBXL Expression Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) Expression Loads'; + +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) +SELECT id, '0 0 22 ? * SUN-THU', true FROM bulkload WHERE name = 'XBXL Expression Load'; + +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) +SELECT id, 'EXPRESSION', 'XBXL' FROM bulkload WHERE name = 'XBXL Expression Load'; From 56f6b57bcf304708bc678d4bb8a0d1bbd180785d Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Wed, 11 Sep 2024 15:52:46 +0200 Subject: [PATCH 078/118] SCRUM-4436 improve construct association load performance --- .../curation_api/dao/ConstructDAO.java | 9 ++++- .../curation_api/dao/GenomicEntityDAO.java | 10 +++++- .../services/ConstructService.java | 2 +- .../services/GenomicEntityService.java | 2 +- ...tGenomicEntityAssociationDTOValidator.java | 36 +++++++++---------- 5 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java index 1d588c9ee..f721fb531 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java @@ -17,7 +17,9 @@ protected ConstructDAO() { } public Map getConstructIdMap() { - Map constructIdMap = new HashMap<>(); + if (constructIdMap.size() > 0) { + return constructIdMap; + } Query q = entityManager.createNativeQuery("SELECT a.id, a.modEntityId, a.modInternalId FROM Reagent as a where exists (select * from construct as g where g.id = a.id)"); List ids = q.getResultList(); ids.forEach(record -> { @@ -31,4 +33,9 @@ public Map getConstructIdMap() { return constructIdMap; } + private Map constructIdMap = new HashMap<>(); + + public long getConstructIdByModID(String modID) { + return getConstructIdMap().get(modID); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java index bc6d184da..6c94020a7 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java @@ -17,7 +17,9 @@ protected GenomicEntityDAO() { } public Map getGenomicEntityIdMap() { - Map genomicEntityIdMap = new HashMap<>(); + if (genomicEntityIdMap.size() > 0) { + return genomicEntityIdMap; + } Query q = entityManager.createNativeQuery("SELECT a.id, a.modEntityId, a.modInternalId FROM BiologicalEntity as a where exists (select * from genomicentity as g where g.id = a.id)"); List ids = q.getResultList(); ids.forEach(record -> { @@ -31,4 +33,10 @@ public Map getGenomicEntityIdMap() { return genomicEntityIdMap; } + private Map genomicEntityIdMap = new HashMap<>(); + + public long getGenomicEntityIdByModID(String modID) { + return getGenomicEntityIdMap().get(modID); + } + } diff --git a/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java b/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java index 5b7dbd368..ca1c025a3 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java @@ -143,7 +143,7 @@ public Map getConstructIdMap() { private Map constructIdMap = new HashMap<>(); public Long getIdByModID(String modID) { - return getConstructIdMap().get(modID); + return constructDAO.getConstructIdByModID(modID); } public Construct getShallowEntity(Long id) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java b/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java index a0887dca4..fa0db1163 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java @@ -51,7 +51,7 @@ public Map getGenomicEntityIdMap() { private Map genomicEntityIdMap = new HashMap<>(); public Long getIdByModID(String modID) { - return getGenomicEntityIdMap().get(modID); + return genomicEntityDAO.getGenomicEntityIdByModID(modID); } public GenomicEntity getShallowEntity(Long id) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java index d5b8a7c31..8080ac3b3 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java @@ -1,9 +1,7 @@ package org.alliancegenome.curation_api.services.validation.dto.associations.constructAssociations; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.constants.VocabularyConstants; import org.alliancegenome.curation_api.dao.associations.constructAssociations.ConstructGenomicEntityAssociationDAO; @@ -27,17 +25,23 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; @RequestScoped public class ConstructGenomicEntityAssociationDTOValidator extends EvidenceAssociationDTOValidator { - @Inject ConstructService constructService; - @Inject GenomicEntityService genomicEntityService; - @Inject NoteDTOValidator noteDtoValidator; - @Inject VocabularyTermService vocabularyTermService; - @Inject ConstructGenomicEntityAssociationDAO constructGenomicEntityAssociationDAO; + @Inject + ConstructService constructService; + @Inject + GenomicEntityService genomicEntityService; + @Inject + NoteDTOValidator noteDtoValidator; + @Inject + VocabularyTermService vocabularyTermService; + @Inject + ConstructGenomicEntityAssociationDAO constructGenomicEntityAssociationDAO; private ObjectResponse assocResponse; @@ -47,10 +51,7 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati Construct construct = null; if (StringUtils.isNotBlank(dto.getConstructIdentifier())) { - Long constructId = constructService.getIdByModID(dto.getConstructIdentifier()); - if (constructId != null) { - construct = constructService.getShallowEntity(constructId); - } + construct = constructService.getShallowEntity(constructService.getIdByModID(dto.getConstructIdentifier())); if (construct == null) { assocResponse.addErrorMessage("construct_identifier", ValidationConstants.INVALID_MESSAGE); } else { @@ -66,10 +67,7 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati if (StringUtils.isBlank(dto.getGenomicEntityIdentifier())) { assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.REQUIRED_MESSAGE); } else { - Long genomicEntityId = genomicEntityService.getIdByModID(dto.getGenomicEntityIdentifier()); - if (genomicEntityId != null) { - genomicEntity = genomicEntityService.getShallowEntity(genomicEntityId); - } + genomicEntity = genomicEntityService.getShallowEntity(genomicEntityService.getIdByModID(dto.getGenomicEntityIdentifier())); if (genomicEntity == null) { assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.INVALID_MESSAGE + " (" + dto.getGenomicEntityIdentifier() + ")"); } From b25ceff221778a7658624282e5324b1020256b9f Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Fri, 13 Sep 2024 10:14:35 +0200 Subject: [PATCH 079/118] change ApplicationScoped to RequestScoped --- .../org/alliancegenome/curation_api/dao/ConstructDAO.java | 4 ++-- .../org/alliancegenome/curation_api/dao/GenomicEntityDAO.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java index f721fb531..229ca711d 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java @@ -1,6 +1,6 @@ package org.alliancegenome.curation_api.dao; -import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.context.RequestScoped; import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.Construct; @@ -9,7 +9,7 @@ import java.util.List; import java.util.Map; -@ApplicationScoped +@RequestScoped public class ConstructDAO extends BaseSQLDAO { protected ConstructDAO() { diff --git a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java index 6c94020a7..0373bf9c4 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java @@ -1,6 +1,6 @@ package org.alliancegenome.curation_api.dao; -import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.context.RequestScoped; import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.GenomicEntity; @@ -9,7 +9,7 @@ import java.util.List; import java.util.Map; -@ApplicationScoped +@RequestScoped public class GenomicEntityDAO extends BaseSQLDAO { protected GenomicEntityDAO() { From ab1b734dd04c145fb34d4a17651efa3b501d4ae9 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Wed, 18 Sep 2024 11:10:11 +0200 Subject: [PATCH 080/118] move caching of id-modID map from dao to service level --- .../curation_api/dao/ConstructDAO.java | 12 +++--------- .../curation_api/dao/GenomicEntityDAO.java | 13 +++---------- .../curation_api/services/ConstructService.java | 2 +- .../curation_api/services/GenomicEntityService.java | 2 +- 4 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java index 229ca711d..860ee4d3e 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java @@ -1,5 +1,6 @@ package org.alliancegenome.curation_api.dao; +import jakarta.enterprise.context.ApplicationScoped; import jakarta.enterprise.context.RequestScoped; import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; @@ -9,7 +10,7 @@ import java.util.List; import java.util.Map; -@RequestScoped +@ApplicationScoped public class ConstructDAO extends BaseSQLDAO { protected ConstructDAO() { @@ -17,9 +18,7 @@ protected ConstructDAO() { } public Map getConstructIdMap() { - if (constructIdMap.size() > 0) { - return constructIdMap; - } + Map constructIdMap = new HashMap<>(); Query q = entityManager.createNativeQuery("SELECT a.id, a.modEntityId, a.modInternalId FROM Reagent as a where exists (select * from construct as g where g.id = a.id)"); List ids = q.getResultList(); ids.forEach(record -> { @@ -33,9 +32,4 @@ public Map getConstructIdMap() { return constructIdMap; } - private Map constructIdMap = new HashMap<>(); - - public long getConstructIdByModID(String modID) { - return getConstructIdMap().get(modID); - } } diff --git a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java index 0373bf9c4..4f58ec5e2 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java @@ -1,5 +1,6 @@ package org.alliancegenome.curation_api.dao; +import jakarta.enterprise.context.ApplicationScoped; import jakarta.enterprise.context.RequestScoped; import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; @@ -9,7 +10,7 @@ import java.util.List; import java.util.Map; -@RequestScoped +@ApplicationScoped public class GenomicEntityDAO extends BaseSQLDAO { protected GenomicEntityDAO() { @@ -17,9 +18,7 @@ protected GenomicEntityDAO() { } public Map getGenomicEntityIdMap() { - if (genomicEntityIdMap.size() > 0) { - return genomicEntityIdMap; - } + Map genomicEntityIdMap = new HashMap<>(); Query q = entityManager.createNativeQuery("SELECT a.id, a.modEntityId, a.modInternalId FROM BiologicalEntity as a where exists (select * from genomicentity as g where g.id = a.id)"); List ids = q.getResultList(); ids.forEach(record -> { @@ -33,10 +32,4 @@ public Map getGenomicEntityIdMap() { return genomicEntityIdMap; } - private Map genomicEntityIdMap = new HashMap<>(); - - public long getGenomicEntityIdByModID(String modID) { - return getGenomicEntityIdMap().get(modID); - } - } diff --git a/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java b/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java index ca1c025a3..5b7dbd368 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/ConstructService.java @@ -143,7 +143,7 @@ public Map getConstructIdMap() { private Map constructIdMap = new HashMap<>(); public Long getIdByModID(String modID) { - return constructDAO.getConstructIdByModID(modID); + return getConstructIdMap().get(modID); } public Construct getShallowEntity(Long id) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java b/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java index fa0db1163..a0887dca4 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/GenomicEntityService.java @@ -51,7 +51,7 @@ public Map getGenomicEntityIdMap() { private Map genomicEntityIdMap = new HashMap<>(); public Long getIdByModID(String modID) { - return genomicEntityDAO.getGenomicEntityIdByModID(modID); + return getGenomicEntityIdMap().get(modID); } public GenomicEntity getShallowEntity(Long id) { From 9f8a199006ff9f980fdea86808d1378c0ce04d07 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Wed, 18 Sep 2024 16:25:50 +0200 Subject: [PATCH 081/118] cleanup code --- .../java/org/alliancegenome/curation_api/dao/ConstructDAO.java | 1 - .../org/alliancegenome/curation_api/dao/GenomicEntityDAO.java | 1 - 2 files changed, 2 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java index 860ee4d3e..1d588c9ee 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ConstructDAO.java @@ -1,7 +1,6 @@ package org.alliancegenome.curation_api.dao; import jakarta.enterprise.context.ApplicationScoped; -import jakarta.enterprise.context.RequestScoped; import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.Construct; diff --git a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java index 4f58ec5e2..bc6d184da 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/GenomicEntityDAO.java @@ -1,7 +1,6 @@ package org.alliancegenome.curation_api.dao; import jakarta.enterprise.context.ApplicationScoped; -import jakarta.enterprise.context.RequestScoped; import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.GenomicEntity; From ee7dd27ae4ddabb8e3388f9c87637ecac1030260 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Fri, 27 Sep 2024 14:02:54 +0200 Subject: [PATCH 082/118] put reference in cache --- .../curation_api/dao/ReferenceDAO.java | 32 ++++++++++++++-- .../services/ReferenceService.java | 37 +++++++++++-------- .../EvidenceAssociationDTOValidator.java | 12 +++--- ...tGenomicEntityAssociationDTOValidator.java | 2 +- 4 files changed, 57 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java index 3edde99bb..87f6431d3 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java @@ -1,11 +1,13 @@ package org.alliancegenome.curation_api.dao; -import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; -import org.alliancegenome.curation_api.model.entities.Reference; - import jakarta.enterprise.context.ApplicationScoped; import jakarta.persistence.Query; import jakarta.transaction.Transactional; +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.Reference; + +import java.util.*; +import java.util.stream.Collectors; @ApplicationScoped public class ReferenceDAO extends BaseSQLDAO { @@ -35,4 +37,28 @@ protected void deleteReferenceForeignKey(String table, String column, String ori jpqlQuery.executeUpdate(); } + public HashMap getReferenceMap() { + HashMap referenceIdMap = new HashMap<>(); + Query q = entityManager.createNativeQuery(""" + SELECT ref.id, cr.referencedcurie + FROM Reference as ref, reference_crossreference as assoc, Crossreference as cr + where assoc.reference_id = ref.id + and assoc.crossreferences_id = cr.id + """); + List ids = q.getResultList(); + Set refIDs = ids.stream().map(object -> (Long) object[0]).collect(Collectors.toSet()); + Map> idMap = ids.stream().collect(Collectors.groupingBy(o -> (Long) o[0])); + List refs = new ArrayList<>(); + refIDs.forEach(id -> { + Reference reference = entityManager.getReference(Reference.class, id); + refs.add(reference); + referenceIdMap.put(String.valueOf(id), reference); + if (idMap.get(id) != null) { + idMap.get(id).forEach(objects -> { + referenceIdMap.put((String) objects[1], reference); + }); + } + }); + return referenceIdMap; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java b/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java index 4d8d97576..c1017a389 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java @@ -1,10 +1,10 @@ package org.alliancegenome.curation_api.services; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; - +import io.quarkus.logging.Log; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; import org.alliancegenome.curation_api.dao.ReferenceDAO; import org.alliancegenome.curation_api.model.entities.Reference; import org.alliancegenome.curation_api.response.ObjectResponse; @@ -12,18 +12,19 @@ import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.alliancegenome.curation_api.services.helpers.references.ReferenceSynchronisationHelper; -import io.quarkus.logging.Log; -import jakarta.annotation.PostConstruct; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; -import jakarta.transaction.Transactional; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; @RequestScoped public class ReferenceService extends BaseEntityCrudService { - @Inject ReferenceDAO referenceDAO; - @Inject ReferenceSynchronisationHelper refSyncHelper; - + @Inject + ReferenceDAO referenceDAO; + @Inject + ReferenceSynchronisationHelper refSyncHelper; + Date referenceRequest; HashMap referenceCacheMap = new HashMap<>(); @@ -56,8 +57,12 @@ public Reference retrieveFromDbOrLiteratureService(String curieOrXref) { reference = referenceCacheMap.get(curieOrXref); } else { Log.debug("Reference not cached, caching reference: (" + curieOrXref + ")"); - reference = findOrCreateReference(curieOrXref); - referenceCacheMap.put(curieOrXref, reference); + if (referenceCacheMap.isEmpty()) { + referenceCacheMap = referenceDAO.getReferenceMap(); + } else { + reference = findOrCreateReference(curieOrXref); + referenceCacheMap.put(curieOrXref, reference); + } } } else { reference = findOrCreateReference(curieOrXref); @@ -65,7 +70,7 @@ public Reference retrieveFromDbOrLiteratureService(String curieOrXref) { } return reference; } - + private Reference findOrCreateReference(String curieOrXref) { Reference reference = null; diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java index 4441063db..328669890 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java @@ -1,8 +1,7 @@ package org.alliancegenome.curation_api.services.validation.dto.associations; -import java.util.ArrayList; -import java.util.List; - +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; import org.alliancegenome.curation_api.constants.ValidationConstants; import org.alliancegenome.curation_api.model.entities.EvidenceAssociation; import org.alliancegenome.curation_api.model.entities.InformationContentEntity; @@ -12,13 +11,14 @@ import org.alliancegenome.curation_api.services.validation.dto.base.BaseDTOValidator; import org.apache.commons.collections.CollectionUtils; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; +import java.util.ArrayList; +import java.util.List; @RequestScoped public class EvidenceAssociationDTOValidator extends BaseDTOValidator { - @Inject InformationContentEntityService informationContentEntityService; + @Inject + InformationContentEntityService informationContentEntityService; public ObjectResponse validateEvidenceAssociationDTO(E association, D dto) { ObjectResponse assocResponse = validateAuditedObjectDTO(association, dto); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java index 8080ac3b3..39b11c42e 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java @@ -57,6 +57,7 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati } else { if (beDataProvider != null && !construct.getDataProvider().getSourceOrganization().getAbbreviation().equals(beDataProvider.sourceOrganization)) { assocResponse.addErrorMessage("construct_identifier", ValidationConstants.INVALID_MESSAGE + " for " + beDataProvider.name() + " load"); + return null; } } } else { @@ -72,7 +73,6 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.INVALID_MESSAGE + " (" + dto.getGenomicEntityIdentifier() + ")"); } } - ConstructGenomicEntityAssociation association = null; if (construct != null && StringUtils.isNotBlank(dto.getGenomicEntityRelationName()) && genomicEntity != null) { HashMap params = new HashMap<>(); From b06fe92c4412f3b523a2d93279517d66b2074c4a Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Wed, 13 Nov 2024 17:20:20 +0100 Subject: [PATCH 083/118] adjust the model: only use Gene, allele and AGMs as genomic entities for ConstructGenomicEntityAssoc --- .../curation_api/dao/ReferenceDAO.java | 25 +++++++++++++++++++ .../curation_api/jobs/JobScheduler.java | 2 ++ .../jobs/processors/BulkLoadProcessor.java | 1 - .../model/entities/AffectedGenomicModel.java | 12 +++++++++ .../curation_api/model/entities/Allele.java | 22 +++++++++------- .../curation_api/model/entities/Gene.java | 13 ++++++++++ .../model/entities/GenomicEntity.java | 9 ------- .../ConstructGenomicEntityAssociation.java | 2 +- .../InformationContentEntityService.java | 4 +++ .../services/ReferenceService.java | 22 ++++++++++++++++ ...structGenomicEntityAssociationService.java | 6 ++++- .../EvidenceAssociationDTOValidator.java | 1 - 12 files changed, 97 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java index 87f6431d3..1c716ae73 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/ReferenceDAO.java @@ -61,4 +61,29 @@ public HashMap getReferenceMap() { }); return referenceIdMap; } + + public HashMap getShallowReferenceMap() { + HashMap referenceIdMap = new HashMap<>(); + Query q = entityManager.createNativeQuery(""" + SELECT ref.id, cr.referencedcurie + FROM Reference as ref, reference_crossreference as assoc, Crossreference as cr + where assoc.reference_id = ref.id + and assoc.crossreferences_id = cr.id + """); + List ids = q.getResultList(); + Set refIDs = ids.stream().map(object -> (Long) object[0]).collect(Collectors.toSet()); + Map> idMap = ids.stream().collect(Collectors.groupingBy(o -> (Long) o[0])); + List refs = new ArrayList<>(); + refIDs.forEach(id -> { + Reference reference = getShallowEntity(Reference.class, id); + refs.add(reference); + referenceIdMap.put(String.valueOf(id), reference); + if (idMap.get(id) != null) { + idMap.get(id).forEach(objects -> { + referenceIdMap.put((String) objects[1], reference); + }); + } + }); + return referenceIdMap; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java b/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java index f121192d3..dfc12d884 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java @@ -109,7 +109,9 @@ public void init() { } } +/* @Scheduled(every = "1s") +*/ public void scheduleCronGroupJobs() { if (loadSchedulingEnabled) { if (sem.tryAcquire()) { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java index e61b1a910..ec73003b2 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java @@ -208,7 +208,6 @@ protected void startLoad(BulkLoadFileHistory bulkLoadFileHistory) { protected void endLoad(BulkLoadFileHistory bulkLoadFileHistory, String message, JobStatus status) { if (bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath() != null) { Log.info("Removing old input file: " + bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()); - new File(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()).delete(); bulkLoadFileHistory.getBulkLoadFile().setLocalFilePath(null); bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/AffectedGenomicModel.java b/src/main/java/org/alliancegenome/curation_api/model/entities/AffectedGenomicModel.java index bf2b61b71..93e9d514a 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/AffectedGenomicModel.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/AffectedGenomicModel.java @@ -4,6 +4,7 @@ import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.associations.constructAssociations.ConstructGenomicEntityAssociation; import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.media.Schema; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; @@ -43,4 +44,15 @@ public class AffectedGenomicModel extends GenomicEntity { @ManyToOne @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) private VocabularyTerm subtype; + + @IndexedEmbedded(includePaths = { + "constructAssociationSubject.curie", "constructAssociationSubject.constructSymbol.displayText", "constructAssociationSubject.constructSymbol.formatText", + "constructAssociationSubject.constructFullName.displayText", "constructAssociationSubject.constructFullName.formatText", "constructAssociationSubject.modEntityId", + "constructAssociationSubject.curie_keyword", "constructAssociationSubject.constructSymbol.displayText_keyword", "constructAssociationSubject.constructSymbol.formatText_keyword", + "constructAssociationSubject.constructFullName.displayText_keyword", "constructAssociationSubject.constructFullName.formatText_keyword", "constructAssociationSubject.modEntityId_keyword" + }) + @OneToMany(mappedBy = "constructGenomicEntityAssociationObject", cascade = CascadeType.ALL, orphanRemoval = true) + @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) + private List constructGenomicEntityAssociations; + } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Allele.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Allele.java index d5d9a1bbc..7abed646a 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Allele.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Allele.java @@ -2,11 +2,13 @@ import java.util.List; +import jakarta.persistence.*; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.bridges.BooleanAndNullValueBridge; import org.alliancegenome.curation_api.model.entities.associations.alleleAssociations.AlleleGeneAssociation; import org.alliancegenome.curation_api.model.entities.associations.alleleAssociations.AlleleVariantAssociation; +import org.alliancegenome.curation_api.model.entities.associations.constructAssociations.ConstructGenomicEntityAssociation; import org.alliancegenome.curation_api.model.entities.slotAnnotations.alleleSlotAnnotations.AlleleDatabaseStatusSlotAnnotation; import org.alliancegenome.curation_api.model.entities.slotAnnotations.alleleSlotAnnotations.AlleleFullNameSlotAnnotation; import org.alliancegenome.curation_api.model.entities.slotAnnotations.alleleSlotAnnotations.AlleleFunctionalImpactSlotAnnotation; @@ -34,15 +36,6 @@ import com.fasterxml.jackson.annotation.JsonManagedReference; import com.fasterxml.jackson.annotation.JsonView; -import jakarta.persistence.CascadeType; -import jakarta.persistence.Entity; -import jakarta.persistence.Index; -import jakarta.persistence.JoinTable; -import jakarta.persistence.ManyToMany; -import jakarta.persistence.ManyToOne; -import jakarta.persistence.OneToMany; -import jakarta.persistence.OneToOne; -import jakarta.persistence.Table; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.ToString; @@ -212,4 +205,15 @@ public class Allele extends GenomicEntity { @Index(name = "allele_note_relatednotes_index", columnList = "relatedNotes_id")}) private List relatedNotes; + + @IndexedEmbedded(includePaths = { + "constructAssociationSubject.curie", "constructAssociationSubject.constructSymbol.displayText", "constructAssociationSubject.constructSymbol.formatText", + "constructAssociationSubject.constructFullName.displayText", "constructAssociationSubject.constructFullName.formatText", "constructAssociationSubject.modEntityId", + "constructAssociationSubject.curie_keyword", "constructAssociationSubject.constructSymbol.displayText_keyword", "constructAssociationSubject.constructSymbol.formatText_keyword", + "constructAssociationSubject.constructFullName.displayText_keyword", "constructAssociationSubject.constructFullName.formatText_keyword", "constructAssociationSubject.modEntityId_keyword" + }) + @OneToMany(mappedBy = "constructGenomicEntityAssociationObject", cascade = CascadeType.ALL, orphanRemoval = true) + @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) + private List constructGenomicEntityAssociations; + } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java index cdeaa6496..bd59979d6 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Gene.java @@ -5,6 +5,7 @@ import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.entities.associations.alleleAssociations.AlleleGeneAssociation; +import org.alliancegenome.curation_api.model.entities.associations.constructAssociations.ConstructGenomicEntityAssociation; import org.alliancegenome.curation_api.model.entities.associations.geneAssociations.GeneGenomicLocationAssociation; import org.alliancegenome.curation_api.model.entities.associations.sequenceTargetingReagentAssociations.SequenceTargetingReagentGeneAssociation; import org.alliancegenome.curation_api.model.entities.associations.transcriptAssociations.TranscriptGeneAssociation; @@ -119,4 +120,16 @@ public class Gene extends GenomicEntity { @OneToMany(mappedBy = "geneAssociationSubject", cascade = CascadeType.ALL, orphanRemoval = true) @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) private List geneGenomicLocationAssociations; + + + @IndexedEmbedded(includePaths = { + "constructAssociationSubject.curie", "constructAssociationSubject.constructSymbol.displayText", "constructAssociationSubject.constructSymbol.formatText", + "constructAssociationSubject.constructFullName.displayText", "constructAssociationSubject.constructFullName.formatText", "constructAssociationSubject.modEntityId", + "constructAssociationSubject.curie_keyword", "constructAssociationSubject.constructSymbol.displayText_keyword", "constructAssociationSubject.constructSymbol.formatText_keyword", + "constructAssociationSubject.constructFullName.displayText_keyword", "constructAssociationSubject.constructFullName.formatText_keyword", "constructAssociationSubject.modEntityId_keyword" + }) + @OneToMany(mappedBy = "constructGenomicEntityAssociationObject", cascade = CascadeType.ALL, orphanRemoval = true) + @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) + private List constructGenomicEntityAssociations; + } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java b/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java index 3bc9a17c8..8c7519081 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java @@ -41,14 +41,5 @@ public class GenomicEntity extends BiologicalEntity { private List crossReferences; - @IndexedEmbedded(includePaths = { - "constructAssociationSubject.curie", "constructAssociationSubject.constructSymbol.displayText", "constructAssociationSubject.constructSymbol.formatText", - "constructAssociationSubject.constructFullName.displayText", "constructAssociationSubject.constructFullName.formatText", "constructAssociationSubject.modEntityId", - "constructAssociationSubject.curie_keyword", "constructAssociationSubject.constructSymbol.displayText_keyword", "constructAssociationSubject.constructSymbol.formatText_keyword", - "constructAssociationSubject.constructFullName.displayText_keyword", "constructAssociationSubject.constructFullName.formatText_keyword", "constructAssociationSubject.modEntityId_keyword" - }) - @OneToMany(mappedBy = "constructGenomicEntityAssociationObject", cascade = CascadeType.ALL, orphanRemoval = true) - @JsonView({ View.FieldsAndLists.class, View.GeneDetailView.class }) - private List constructGenomicEntityAssociations; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java index bee923f50..87e0bdef8 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java @@ -35,7 +35,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(callSuper = true) +@ToString(callSuper = false) @AGRCurationSchemaVersion(min = "2.2.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { EvidenceAssociation.class }) @Schema(name = "ConstructGenomicEntityAssociation", description = "POJO representing an association between a construct and a genomic entity") diff --git a/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java b/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java index 645bdacb0..c76141f38 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java @@ -28,4 +28,8 @@ public InformationContentEntity retrieveFromDbOrLiteratureService(String curieOr return ice; } + + public InformationContentEntity retrieveFromDbOrLiteratureServicea(String pmid) { + return referenceService.retrieveShallowReferenceFromDbOrLiteratureService(pmid); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java b/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java index c1017a389..20128b092 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java @@ -26,7 +26,9 @@ public class ReferenceService extends BaseEntityCrudService referenceCacheMap = new HashMap<>(); + HashMap shallowReferenceCacheMap = new HashMap<>(); @Override @PostConstruct @@ -71,6 +73,26 @@ public Reference retrieveFromDbOrLiteratureService(String curieOrXref) { return reference; } + @Transactional + public Reference retrieveShallowReferenceFromDbOrLiteratureService(String pmid) { + Reference reference = null; + if (shallowReferenceCacheMap.containsKey(pmid)) { + reference = shallowReferenceCacheMap.get(pmid); + } else { + Log.debug("Reference not cached, caching reference: (" + pmid + ")"); + if (shallowReferenceCacheMap.isEmpty()) { + shallowReferenceCacheMap = referenceDAO.getShallowReferenceMap(); + reference = shallowReferenceCacheMap.get(pmid); + } else { +/* + reference = findOrCreateReference(curieOrXref); + referenceCacheMap.put(curieOrXref, reference); +*/ + } + } + return reference; + } + private Reference findOrCreateReference(String curieOrXref) { Reference reference = null; diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java index 7a0098d77..817635105 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java @@ -59,9 +59,11 @@ public ObjectResponse upsert(ConstructGenomic return null; } dbEntity = constructGenomicEntityAssociationDAO.persist(dbEntity); +/* addAssociationToConstruct(dbEntity); addAssociationToGenomicEntity(dbEntity); - return new ObjectResponse(dbEntity); +*/ + return new ObjectResponse<>(dbEntity); } public ObjectResponse validate(ConstructGenomicEntityAssociation uiEntity) { @@ -150,6 +152,7 @@ private void addAssociationToConstruct(ConstructGenomicEntityAssociation associa constructDAO.persist(construct); } +/* private void addAssociationToGenomicEntity(ConstructGenomicEntityAssociation association) { GenomicEntity genomicEntity = association.getConstructGenomicEntityAssociationObject(); List currentAssociations = genomicEntity.getConstructGenomicEntityAssociations(); @@ -164,4 +167,5 @@ private void addAssociationToGenomicEntity(ConstructGenomicEntityAssociation ass genomicEntity.setConstructGenomicEntityAssociations(currentAssociations); genomicEntityDAO.persist(genomicEntity); } +*/ } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java index 328669890..7614a644d 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/EvidenceAssociationDTOValidator.java @@ -22,7 +22,6 @@ public class EvidenceAssociationDTOValidator extends BaseDTOValidator { public ObjectResponse validateEvidenceAssociationDTO(E association, D dto) { ObjectResponse assocResponse = validateAuditedObjectDTO(association, dto); - association = assocResponse.getEntity(); if (CollectionUtils.isNotEmpty(dto.getEvidenceCuries())) { List evidence = new ArrayList<>(); From e91fb66de519a1f06290ffa50001e307d2b68fdf Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Thu, 21 Nov 2024 07:49:27 +0100 Subject: [PATCH 084/118] SCRUM-4436 cleanup process should retrieve shallow association object --- ...structGenomicEntityAssociationService.java | 67 ++++++++----------- 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java index 817635105..b24a06819 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java @@ -1,13 +1,10 @@ package org.alliancegenome.curation_api.services.associations.constructAssociations; -import java.time.OffsetDateTime; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.Collectors; - +import io.quarkus.logging.Log; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; import org.alliancegenome.curation_api.constants.EntityFieldConstants; import org.alliancegenome.curation_api.dao.ConstructDAO; import org.alliancegenome.curation_api.dao.GenomicEntityDAO; @@ -18,7 +15,6 @@ import org.alliancegenome.curation_api.exceptions.ValidationException; import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; import org.alliancegenome.curation_api.model.entities.Construct; -import org.alliancegenome.curation_api.model.entities.GenomicEntity; import org.alliancegenome.curation_api.model.entities.associations.constructAssociations.ConstructGenomicEntityAssociation; import org.alliancegenome.curation_api.model.ingest.dto.associations.constructAssociations.ConstructGenomicEntityAssociationDTO; import org.alliancegenome.curation_api.response.ObjectResponse; @@ -28,23 +24,28 @@ import org.alliancegenome.curation_api.services.validation.associations.constructAssociations.ConstructGenomicEntityAssociationValidator; import org.alliancegenome.curation_api.services.validation.dto.associations.constructAssociations.ConstructGenomicEntityAssociationDTOValidator; -import io.quarkus.logging.Log; -import jakarta.annotation.PostConstruct; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; -import jakarta.transaction.Transactional; +import java.time.OffsetDateTime; +import java.util.*; +import java.util.stream.Collectors; @RequestScoped public class ConstructGenomicEntityAssociationService extends BaseAssociationDTOCrudService implements BaseUpsertServiceInterface { - @Inject ConstructGenomicEntityAssociationDAO constructGenomicEntityAssociationDAO; - @Inject ConstructGenomicEntityAssociationValidator constructGenomicEntityAssociationValidator; - @Inject ConstructGenomicEntityAssociationDTOValidator constructGenomicEntityAssociationDtoValidator; - @Inject ConstructDAO constructDAO; - @Inject GenomicEntityDAO genomicEntityDAO; - @Inject PersonService personService; - @Inject PersonDAO personDAO; + @Inject + ConstructGenomicEntityAssociationDAO constructGenomicEntityAssociationDAO; + @Inject + ConstructGenomicEntityAssociationValidator constructGenomicEntityAssociationValidator; + @Inject + ConstructGenomicEntityAssociationDTOValidator constructGenomicEntityAssociationDtoValidator; + @Inject + ConstructDAO constructDAO; + @Inject + GenomicEntityDAO genomicEntityDAO; + @Inject + PersonService personService; + @Inject + PersonDAO personDAO; @Override @PostConstruct @@ -87,11 +88,11 @@ public List getAssociationsByDataProvider(BackendBulkDataProvider dataProv } @Transactional - public ConstructGenomicEntityAssociation deprecateOrDeleteAssociation(Long id, Boolean throwApiError, String loadDescription, Boolean deprecate) { - ConstructGenomicEntityAssociation association = constructGenomicEntityAssociationDAO.find(id); + public ConstructGenomicEntityAssociation deprecateOrDelete(Long id, Boolean throwApiError, String requestSource, Boolean deprecate) { + ConstructGenomicEntityAssociation object = dao.getShallowEntity(ConstructGenomicEntityAssociation.class, id); - if (association == null) { - String errorMessage = "Could not find ConstructGenomicEntityAssociation with id: " + id; + if (object == null) { + String errorMessage = "Could not find entity with id: " + id; if (throwApiError) { ObjectResponse response = new ObjectResponse<>(); response.addErrorMessage("id", errorMessage); @@ -100,21 +101,7 @@ public ConstructGenomicEntityAssociation deprecateOrDeleteAssociation(Long id, B Log.error(errorMessage); return null; } - if (deprecate) { - if (!association.getObsolete()) { - association.setObsolete(true); - if (authenticatedPerson != null) { - association.setUpdatedBy(personDAO.find(authenticatedPerson.getId())); - } else { - association.setUpdatedBy(personService.fetchByUniqueIdOrCreate(loadDescription)); - } - association.setDateUpdated(OffsetDateTime.now()); - return constructGenomicEntityAssociationDAO.persist(association); - } - } - - constructGenomicEntityAssociationDAO.remove(association.getId()); - + dao.remove(id); return null; } From b52a68ccdf27e8982525eecded4d18e01ec04ccb Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Thu, 21 Nov 2024 07:55:36 +0100 Subject: [PATCH 085/118] undo commenting out scheduler --- .../java/org/alliancegenome/curation_api/jobs/JobScheduler.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java b/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java index dfc12d884..f121192d3 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/JobScheduler.java @@ -109,9 +109,7 @@ public void init() { } } -/* @Scheduled(every = "1s") -*/ public void scheduleCronGroupJobs() { if (loadSchedulingEnabled) { if (sem.tryAcquire()) { From 08975ca60f36947855ad4f2ee2de5d052c70d505 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Thu, 21 Nov 2024 08:00:30 +0100 Subject: [PATCH 086/118] remove unused imports --- .../ConstructGenomicEntityAssociationService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java b/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java index b24a06819..258396355 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/associations/constructAssociations/ConstructGenomicEntityAssociationService.java @@ -24,7 +24,6 @@ import org.alliancegenome.curation_api.services.validation.associations.constructAssociations.ConstructGenomicEntityAssociationValidator; import org.alliancegenome.curation_api.services.validation.dto.associations.constructAssociations.ConstructGenomicEntityAssociationDTOValidator; -import java.time.OffsetDateTime; import java.util.*; import java.util.stream.Collectors; From 84bba2d1b664ba1215a2b2c3bd0917af56ec5b57 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Thu, 21 Nov 2024 10:44:11 +0100 Subject: [PATCH 087/118] remove unused imports --- .../ConstructGenomicEntityAssociation.java | 43 +++++++------------ 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java index 87e0bdef8..90bc0ec44 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java @@ -1,14 +1,14 @@ package org.alliancegenome.curation_api.model.entities.associations.constructAssociations; -import java.util.List; - +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; -import org.alliancegenome.curation_api.model.entities.Construct; -import org.alliancegenome.curation_api.model.entities.EvidenceAssociation; -import org.alliancegenome.curation_api.model.entities.GenomicEntity; -import org.alliancegenome.curation_api.model.entities.Note; -import org.alliancegenome.curation_api.model.entities.VocabularyTerm; +import org.alliancegenome.curation_api.model.entities.*; import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.media.Schema; import org.hibernate.annotations.Fetch; @@ -17,26 +17,13 @@ import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonView; - -import jakarta.persistence.CascadeType; -import jakarta.persistence.Entity; -import jakarta.persistence.Index; -import jakarta.persistence.JoinColumn; -import jakarta.persistence.JoinTable; -import jakarta.persistence.ManyToOne; -import jakarta.persistence.OneToMany; -import jakarta.persistence.Table; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.ToString; +import java.util.List; @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) @ToString(callSuper = false) -@AGRCurationSchemaVersion(min = "2.2.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { EvidenceAssociation.class }) +@AGRCurationSchemaVersion(min = "2.2.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {EvidenceAssociation.class}) @Schema(name = "ConstructGenomicEntityAssociation", description = "POJO representing an association between a construct and a genomic entity") @Table(indexes = { @@ -57,21 +44,21 @@ public class ConstructGenomicEntityAssociation extends EvidenceAssociation { "curie_keyword", "constructSymbol.displayText_keyword", "constructSymbol.formatText_keyword", "constructFullName.displayText_keyword", "constructFullName.formatText_keyword", "modEntityId_keyword", "modInternalId_keyword"}) @ManyToOne - @JsonView({ View.FieldsOnly.class }) + @JsonView({View.FieldsOnly.class}) @JsonIgnoreProperties("constructGenomicEntityAssociations") @Fetch(FetchMode.JOIN) private Construct constructAssociationSubject; - + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class }) + @JsonView({View.FieldsOnly.class}) private VocabularyTerm relation; - + @IndexedEmbedded(includeDepth = 1) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class }) + @JsonView({View.FieldsOnly.class}) @JsonIgnoreProperties({ "alleleGeneAssociations", "constructGenomicEntityAssociations", "sequenceTargetingReagentGeneAssociations", "transcriptGenomicLocationAssociations", "exonGenomicLocationAssociations", "codingSequenceGenomicLocationAssociations", @@ -85,7 +72,7 @@ public class ConstructGenomicEntityAssociation extends EvidenceAssociation { }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @OneToMany(cascade = CascadeType.ALL, orphanRemoval = true) - @JsonView({ View.FieldsAndLists.class, View.ConstructView.class }) + @JsonView({View.FieldsAndLists.class, View.ConstructView.class}) @JoinTable( joinColumns = @JoinColumn(name = "constructgenomicentityassociation_id"), inverseJoinColumns = @JoinColumn(name = "relatedNotes_id"), From 8bdc2667f01737eeffb1cf8f2e038e72a8d69a32 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Thu, 21 Nov 2024 10:25:10 +0000 Subject: [PATCH 088/118] SCRUM-3953: change after review --- .../dto/fms/GeneExpressionAnnotationFmsDTOValidator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java index ec6a8a36f..89d849ac1 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/GeneExpressionAnnotationFmsDTOValidator.java @@ -48,7 +48,6 @@ public class GeneExpressionAnnotationFmsDTOValidator { public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpressionFmsDTO, BackendBulkDataProvider dataProvider, Map> experiments) throws ValidationException { ObjectResponse response = new ObjectResponse<>(); GeneExpressionAnnotation geneExpressionAnnotation = new GeneExpressionAnnotation(); - geneExpressionAnnotation.setExpressionPattern(new ExpressionPattern()); String uniqueId = "empty"; String referenceCurie = "empty"; @@ -64,12 +63,13 @@ public GeneExpressionAnnotation validateAnnotation(GeneExpressionFmsDTO geneExpr } else { geneExpressionAnnotation.setUniqueId(uniqueId); } - if (geneExpressionAnnotation.getExpressionPattern() == null) { - geneExpressionAnnotation.setExpressionPattern(new ExpressionPattern()); - } geneExpressionAnnotation.setSingleReference(singleReferenceResponse.getEntity()); } + if (geneExpressionAnnotation.getExpressionPattern() == null) { + geneExpressionAnnotation.setExpressionPattern(new ExpressionPattern()); + } + if (ObjectUtils.isEmpty(geneExpressionFmsDTO.getGeneId())) { response.addErrorMessage("geneId - ", ValidationConstants.REQUIRED_MESSAGE + " (" + geneExpressionFmsDTO.getGeneId() + ")"); } else { From 1588444c9669b6e5beb57bd7e36151d316f04e30 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Thu, 21 Nov 2024 11:34:02 +0100 Subject: [PATCH 089/118] remove unused imports --- .../model/entities/GenomicEntity.java | 26 +++++++------------ 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java b/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java index 8c7519081..8e2486814 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/GenomicEntity.java @@ -1,31 +1,24 @@ package org.alliancegenome.curation_api.model.entities; -import java.util.List; - +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.*; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; -import org.alliancegenome.curation_api.model.entities.associations.constructAssociations.ConstructGenomicEntityAssociation; import org.alliancegenome.curation_api.view.View; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; -import com.fasterxml.jackson.annotation.JsonView; - -import jakarta.persistence.CascadeType; -import jakarta.persistence.Entity; -import jakarta.persistence.Index; -import jakarta.persistence.JoinTable; -import jakarta.persistence.OneToMany; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.ToString; +import java.util.List; @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(exclude = { "constructGenomicEntityAssociations" }, callSuper = true) -@AGRCurationSchemaVersion(min = "1.5.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { BiologicalEntity.class }) +@ToString(exclude = {"constructGenomicEntityAssociations"}, callSuper = true) +@AGRCurationSchemaVersion(min = "1.5.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {BiologicalEntity.class}) public class GenomicEntity extends BiologicalEntity { @IndexedEmbedded(includePaths = {"referencedCurie", "displayName", "resourceDescriptorPage.name", "referencedCurie_keyword", "displayName_keyword", "resourceDescriptorPage.name_keyword"}) @@ -37,9 +30,8 @@ public class GenomicEntity extends BiologicalEntity { @Index(columnList = "crossreferences_id", name = "genomicentity_crossreference_crossreferences_index") }) @EqualsAndHashCode.Include - @JsonView({ View.FieldsAndLists.class, View.AlleleView.class, View.GeneView.class, View.AffectedGenomicModelView.class, View.VariantView.class }) + @JsonView({View.FieldsAndLists.class, View.AlleleView.class, View.GeneView.class, View.AffectedGenomicModelView.class, View.VariantView.class}) private List crossReferences; - } From 39f8f39714aa178b2a4241e3711fcfac66042410 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Wed, 20 Nov 2024 19:11:13 -0600 Subject: [PATCH 090/118] Adding integration tests --- ...atasetSampleAnnotationFmsDTOValidator.java | 7 +- ...ssionDatasetSampleAnnotationFmsITCase.java | 152 ++++++++++++++++++ .../curation_api/base/BaseITCase.java | 25 +++ .../AF_01_all_fields.json | 69 ++++++++ ...R_01_empty_sample_id_and_sample_title.json | 69 ++++++++ .../ER_02_empty_sample_type.json | 69 ++++++++ ...empty_biosample_id_and_biosample_text.json | 70 ++++++++ .../ER_04_empty_assay_type.json | 69 ++++++++ .../ER_05_empty_dataset_ids.json | 67 ++++++++ .../IV_01_invalid_sex.json | 69 ++++++++ .../IV_02_invalid_assay_type.json | 69 ++++++++ .../IV_03_invalid_sample_type.json | 69 ++++++++ .../IV_04_invalid_taxon_id.json | 69 ++++++++ ...alid_genomic_information_biosample_id.json | 69 ++++++++ .../MR_01_no_sample_id_and_sample_title.json | 65 ++++++++ .../MR_02_no_sample_type.json | 68 ++++++++ ...03_no_biosample_id_and_biosample_text.json | 68 ++++++++ .../MR_04_no_assay_type.json | 68 ++++++++ .../MR_05_no_dataset_ids.json | 66 ++++++++ ...E_01_update_empty_non_required_fields.json | 39 +++++ .../UM_01_update_no_non_required_fields.json | 31 ++++ 21 files changed, 1343 insertions(+), 4 deletions(-) create mode 100644 src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/AF_01_all_fields.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/ER_01_empty_sample_id_and_sample_title.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/ER_02_empty_sample_type.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/ER_03_empty_biosample_id_and_biosample_text.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/ER_04_empty_assay_type.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/ER_05_empty_dataset_ids.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/IV_01_invalid_sex.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/IV_02_invalid_assay_type.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/IV_03_invalid_sample_type.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/IV_04_invalid_taxon_id.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/IV_05_invalid_genomic_information_biosample_id.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/MR_01_no_sample_id_and_sample_title.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/MR_02_no_sample_type.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/MR_03_no_biosample_id_and_biosample_text.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/MR_04_no_assay_type.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/MR_05_no_dataset_ids.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json create mode 100644 src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index 83f9f3fee..e5eabcb9b 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -88,7 +88,6 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn htpSampleAnnotation = searchResponse.getSingleResult(); } } else { - htpSampleAnnotationResponse.addErrorMessage("SampleId", ValidationConstants.INVALID_MESSAGE + " (" + curie + ")"); htpSampleAnnotation = new HTPExpressionDatasetSampleAnnotation(); } } else { @@ -190,7 +189,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn boolean added = false; if (searchResponse.getTotalResults() > 0) { for (VocabularyTerm tag : searchResponse.getResults()) { - if (tag.getVocabulary().getName().equals("Genetic Sex") && (tag.getName().equals(dto.getSex()) || tag.getSynonyms().contains(dto.getSex()))) { + if (tag.getVocabulary().getVocabularyLabel().equals("genetic_sex") && (tag.getName().equals(dto.getSex()) || tag.getSynonyms().contains(dto.getSex()))) { htpSampleAnnotation.setGeneticSex(tag); added = true; } @@ -243,14 +242,14 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn if (StringUtils.isNotEmpty(dto.getTaxonId())) { ObjectResponse taxonResponse = ncbiTaxonTermService.getByCurie(dto.getTaxonId()); - if (backendBulkDataProvider != null && (backendBulkDataProvider.name().equals("RGD") || backendBulkDataProvider.name().equals("HUMAN")) && !taxonResponse.getEntity().getCurie().equals(backendBulkDataProvider.canonicalTaxonCurie)) { + if (backendBulkDataProvider != null && (backendBulkDataProvider.name().equals("RGD") || backendBulkDataProvider.name().equals("HUMAN")) && !taxonResponse.getEntity().getCurie().equals(backendBulkDataProvider.canonicalTaxonCurie) || taxonResponse.getEntity() == null) { htpSampleAnnotationResponse.addErrorMessage("taxonId", ValidationConstants.INVALID_MESSAGE + " (" + dto.getTaxonId() + ") for " + backendBulkDataProvider.name() + " load"); } htpSampleAnnotation.setTaxon(taxonResponse.getEntity()); } if (dto.getMicroarraySampleDetails() != null) { - if (htpSampleAnnotation.getMicroarraySampleDetails() == null) { + if (htpSampleAnnotation.getMicroarraySampleDetails() == null && (dto.getMicroarraySampleDetails().getChannelId() != null || dto.getMicroarraySampleDetails().getChannelNum() != null)) { htpSampleAnnotation.setMicroarraySampleDetails(new MicroarraySampleDetails()); } if (StringUtils.isNotEmpty(dto.getMicroarraySampleDetails().getChannelId())) { diff --git a/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java new file mode 100644 index 000000000..e7458c4f8 --- /dev/null +++ b/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java @@ -0,0 +1,152 @@ +package org.alliancegenome.curation_api; + +import org.alliancegenome.curation_api.base.BaseITCase; +import org.alliancegenome.curation_api.resources.TestContainerResource; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +import io.quarkus.test.common.QuarkusTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.RestAssured; +import io.restassured.config.HttpClientConfig; +import io.restassured.config.RestAssuredConfig; + +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; + +@QuarkusIntegrationTest +@QuarkusTestResource(TestContainerResource.Initializer.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@DisplayName("612 - HTPExpressionDatasetSampleAnnotation bulk upload - FMS") +@Order(612) +public class HTPExpressionDatasetSampleAnnotationFmsITCase extends BaseITCase { + + @BeforeEach + public void init() { + RestAssured.config = RestAssuredConfig.config() + .httpClient(HttpClientConfig.httpClientConfig() + .setParam("http.socket.timeout", 100000) + .setParam("http.connection.timeout", 100000)); + } + + private final String htpDatasetSampleBulkPostEndpoint = "/api/htpexpressiondatasetsampleannotation/bulk/FB/htpexpressiondatasetsampleannotationfile"; + private final String htpDatasetSampleTestFilePath = "src/test/resources/bulk/fms/12_htp_dataset_sample/"; + private final String htpDatasetSampleFindEndpoint = "/api/htpexpressiondatasetsampleannotation/find?limit=100&page=0"; + private final String mmoTerm = "HTPMMO:assay001"; + private final String obiTermCurie = "HTPOBI:sample001"; + + + private void loadRequiredEntities() throws Exception { + createMmoTerm(mmoTerm, "assay001"); + createObiTerm(obiTermCurie, "sample001"); + } + + @Test + @Order(1) + public void htpDatasetSampleBulkUploadCheckFields() throws Exception { + loadRequiredEntities(); + checkSuccessfulBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "AF_01_all_fields.json"); + + RestAssured.given().when().header("Content-Type", "application/json").body("{}") + .post(htpDatasetSampleFindEndpoint) + .then().statusCode(200).body("totalResults", is(1)).body("results", hasSize(1)) + .body("results[0].htpExpressionSample.curie", is("GEO:GSE38764")) + .body("results[0].datasetIds", hasSize(1)) + .body("results[0].datasetIds[0].curie", is("GEO:GSE38764")) + .body("results[0].htpExpressionSampleTitle", is("TEST TITLE")) + .body("results[0].expressionAssayUsed.curie", is("HTPMMO:assay001")) + .body("results[0].htpExpressionSampleType.curie", is("HTPOBI:sample001")) + .body("results[0].htpExpressionSampleAge.stage.developmentalStageStart.curie", is("ZFS:001")) + .body("results[0].genomicInformation.bioSampleAllele.modEntityId", is("AGA:Allele0001")) + .body("results[0].microarraySampleDetails.channelId", is("WB:[cgc4349]:fem-3:A")) + .body("results[0].microarraySampleDetails.channelNumber", is(1)) + .body("results[0].geneticSex.name", is("hermaphrodite")) + .body("results[0].sequencingFormat.name", is("single")) + .body("results[0].htpExpressionSampleLocations[0].anatomicalStructure.curie", is("ANAT:001")) + .body("results[0].htpExpressionSampleLocations[0].anatomicalSubstructure.curie", is("ANAT:002")) + .body("results[0].htpExpressionSampleLocations[0].cellularComponentTerm.curie", is("GOTEST:0012")) + .body("results[0].htpExpressionSampleLocations[0].cellularComponentRibbonTerm.curie", is("GOSLIMTEST:0012")) + .body("results[0].taxon.curie", is("NCBITaxon:6239")) + .body("results[0].abundance", is("abundance test")) + .body("results[0].assemblyVersions[0]", is("Zv9")); + } + + @Test + @Order(2) + public void htpDatasetSampleBulkUploadMissingRequiredFields() throws Exception { + + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "MR_01_no_sample_id_and_sample_title.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "MR_02_no_sample_type.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "MR_03_no_biosample_id_and_biosample_text.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "MR_04_no_assay_type.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "MR_05_no_dataset_ids.json"); + } + + @Test + @Order(3) + public void htpDatasetSampleBulkUploadEmptyRequiredFields() throws Exception { + + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "ER_01_empty_sample_id_and_sample_title.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "ER_02_empty_sample_type.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "ER_03_empty_biosample_id_and_biosample_text.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "ER_04_empty_assay_type.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "ER_05_empty_dataset_ids.json"); + } + + @Test + @Order(4) + public void htpDatasetSampleBulkUploadInvalidFields() throws Exception { + + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "IV_01_invalid_sex.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "IV_02_invalid_assay_type.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "IV_03_invalid_sample_type.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "IV_04_invalid_taxon_id.json"); + checkFailedBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "IV_05_invalid_genomic_information_biosample_id.json"); + } + + @Test + @Order(5) + public void htpDatasetSampleBulkUploadUpdateMissingNonRequiredFields() throws Exception { + checkSuccessfulBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "UM_01_update_no_non_required_fields.json"); + + RestAssured.given().when().header("Content-Type", "application/json").body("{}") + .post(htpDatasetSampleFindEndpoint).then().statusCode(200).body("totalResults", is(1)) + .body("results", hasSize(1)) + .body("results[0]", not(hasKey("abundance"))) + .body("results[0]", not(hasKey("htpExpressionSampleTitle"))) + .body("results[0]", not(hasKey("taxon"))) + .body("results[0]", not(hasKey("geneticSex"))) + .body("results[0]", not(hasKey("sequencingFormat"))) + .body("results[0]", not(hasKey("microarraySampleDetails"))) + .body("results[0]", not(hasKey("htpExpressionSampleLocations"))); + } + + @Test + @Order(6) + public void htpDatasetSampleBulkUploadUpdateEmptyNonRequiredFields() throws Exception { + + checkSuccessfulBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "UE_01_update_empty_non_required_fields.json"); + + RestAssured.given().when().header("Content-Type", "application/json").body("{}") + .post(htpDatasetSampleFindEndpoint).then().statusCode(200).body("totalResults", is(1)) + .body("results", hasSize(1)) + .body("results[0]", not(hasKey("htpExpressionSampleAge"))) + .body("results[0]", not(hasKey("abundance"))) + .body("results[0]", not(hasKey("htpExpressionSampleTitle"))) + .body("results[0]", not(hasKey("taxon"))) + .body("results[0]", not(hasKey("geneticSex"))) + .body("results[0]", not(hasKey("sequencingFormat"))) + .body("results[0]", not(hasKey("microarraySampleDetails"))) + .body("results[0]", not(hasKey("htpExpressionSampleLocations"))); + } + + +} diff --git a/src/test/java/org/alliancegenome/curation_api/base/BaseITCase.java b/src/test/java/org/alliancegenome/curation_api/base/BaseITCase.java index 34bb40b0a..bb0b3b981 100644 --- a/src/test/java/org/alliancegenome/curation_api/base/BaseITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/base/BaseITCase.java @@ -50,6 +50,7 @@ import org.alliancegenome.curation_api.model.entities.ontology.GOTerm; import org.alliancegenome.curation_api.model.entities.ontology.MITerm; import org.alliancegenome.curation_api.model.entities.ontology.MMOTerm; +import org.alliancegenome.curation_api.model.entities.ontology.OBITerm; import org.alliancegenome.curation_api.model.entities.ontology.MPTerm; import org.alliancegenome.curation_api.model.entities.ontology.NCBITaxonTerm; import org.alliancegenome.curation_api.model.entities.ontology.OntologyTerm; @@ -604,6 +605,25 @@ public MMOTerm createMmoTerm(String curie, String name) throws Exception { return response.getEntity(); } + public OBITerm createObiTerm(String curie, String name) throws Exception { + OBITerm obiTerm = new OBITerm(); + obiTerm.setCurie(curie); + obiTerm.setName(name); + obiTerm.setObsolete(false); + obiTerm.setSecondaryIdentifiers(List.of(curie + "secondary")); + + ObjectResponse response = RestAssured.given(). + contentType("application/json"). + body(obiTerm). + when(). + put("/api/obiterm"). + then(). + statusCode(200). + extract().body().as(getObjectResponseTypeRefOBITerm()); + + return response.getEntity(); + } + public MPTerm createMpTerm(String curie, String name) { return createMpTerm(curie, name, false); } @@ -1308,6 +1328,11 @@ private TypeRef> getObjectResponseTypeRefMMOTerm() { }; } + private TypeRef> getObjectResponseTypeRefOBITerm() { + return new TypeRef>() { + }; + } + private TypeRef> getObjectResponseTypeRefMPTerm() { return new TypeRef>() { }; diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/AF_01_all_fields.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/AF_01_all_fields.json new file mode 100644 index 000000000..334f007dc --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/AF_01_all_fields.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_01_empty_sample_id_and_sample_title.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_01_empty_sample_id_and_sample_title.json new file mode 100644 index 000000000..8cd99c768 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_01_empty_sample_id_and_sample_title.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_02_empty_sample_type.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_02_empty_sample_type.json new file mode 100644 index 000000000..4b06ff68b --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_02_empty_sample_type.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_03_empty_biosample_id_and_biosample_text.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_03_empty_biosample_id_and_biosample_text.json new file mode 100644 index 000000000..317d58e59 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_03_empty_biosample_id_and_biosample_text.json @@ -0,0 +1,70 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "", + "idType" : "fish", + "bioSampleText": "" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_04_empty_assay_type.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_04_empty_assay_type.json new file mode 100644 index 000000000..71eadb24b --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_04_empty_assay_type.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_05_empty_dataset_ids.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_05_empty_dataset_ids.json new file mode 100644 index 000000000..c8fd4bfff --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/ER_05_empty_dataset_ids.json @@ -0,0 +1,67 @@ +{ + "data": [ + { + "datasetIds" : [], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_01_invalid_sex.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_01_invalid_sex.json new file mode 100644 index 000000000..6d252b133 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_01_invalid_sex.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "invalid", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_02_invalid_assay_type.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_02_invalid_assay_type.json new file mode 100644 index 000000000..c47b06986 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_02_invalid_assay_type.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "invalid_assay_type", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_03_invalid_sample_type.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_03_invalid_sample_type.json new file mode 100644 index 000000000..50b876cc1 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_03_invalid_sample_type.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "invalid_sample_type", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_04_invalid_taxon_id.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_04_invalid_taxon_id.json new file mode 100644 index 000000000..243fc0ece --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_04_invalid_taxon_id.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "INVALID_TAXON_ID", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_05_invalid_genomic_information_biosample_id.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_05_invalid_genomic_information_biosample_id.json new file mode 100644 index 000000000..b3abe5cad --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/IV_05_invalid_genomic_information_biosample_id.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "invalid_biosample_id", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_01_no_sample_id_and_sample_title.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_01_no_sample_id_and_sample_title.json new file mode 100644 index 000000000..2f0abfbf5 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_01_no_sample_id_and_sample_title.json @@ -0,0 +1,65 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_02_no_sample_type.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_02_no_sample_type.json new file mode 100644 index 000000000..d7ec1f06d --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_02_no_sample_type.json @@ -0,0 +1,68 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_03_no_biosample_id_and_biosample_text.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_03_no_biosample_id_and_biosample_text.json new file mode 100644 index 000000000..84c94759c --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_03_no_biosample_id_and_biosample_text.json @@ -0,0 +1,68 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_04_no_assay_type.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_04_no_assay_type.json new file mode 100644 index 000000000..f0768d72c --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_04_no_assay_type.json @@ -0,0 +1,68 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_05_no_dataset_ids.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_05_no_dataset_ids.json new file mode 100644 index 000000000..7f91b8efd --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/MR_05_no_dataset_ids.json @@ -0,0 +1,66 @@ +{ + "data": [ + { + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "sampleAge" : { + "stage" : { + "stageName" : "stage1", + "stageTermId" : "ZFS:001", + "stageUberonSlimTerm" : { + "uberonTerm" : "UBERON:001" + } + } + }, + "abundance" : "abundance test", + "sampleTitle" : "TEST TITLE", + "taxonId" : "NCBITaxon:6239", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "hermaphrodite", + "assemblyVersions" : [ "Zv9" ], + "sequencingFormat" : "single", + "microarraySampleDetails" : { + "channelId" : "WB:[cgc4349]:fem-3:A", + "channelNum" : 1 + }, + "sampleLocations" : [ + { + "whereExpressedStatement" : "trunk", + "anatomicalStructureTermId" : "ANAT:001", + "anatomicalSubStructureTermId" : "ANAT:002", + "cellularComponentTermId" : "GOTEST:0012", + "anatomicalStructureQualifierTermId": "UBERON:002", + "anatomicalSubStructureQualifierTermId": "UBERON:003", + "cellularComponentQualifierTermId": "FBCV:001", + "anatomicalStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:004"}, + {"uberonTerm" : "UBERON:005"} + ], + "anatomicalSubStructureUberonSlimTermIds" : [ + {"uberonTerm" : "UBERON:006"}, + {"uberonTerm" : "UBERON:007"} + ] + } + ] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json new file mode 100644 index 000000000..4353b46b3 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json @@ -0,0 +1,39 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "abundance" : "", + "sampleTitle" : "", + "taxonId" : "", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + }, + "sex" : "", + "sequencingFormat" : "", + "microarraySampleDetails" : { + }, + "sampleLocations" : [] + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json new file mode 100644 index 000000000..cc85fa5f6 --- /dev/null +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json @@ -0,0 +1,31 @@ +{ + "data": [ + { + "datasetIds" : [ + "GEO:GSE38764" + ], + "sampleId" : { + "primaryId" : "GEO:GSE38764" + }, + "assayType" : "HTPMMO:assay001", + "sampleType" : "HTPOBI:sample001", + "genomicInformation" : { + "biosampleId" : "AGA:Allele0001", + "idType" : "fish" + } + } + ], + "metaData": { + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "homepage" + ] + }, + "type": "curated" + }, + "dateProduced": "2024-07-12T12:36:21-04:00", + "release": "2024_03" + } +} \ No newline at end of file From 1ee0a7a6adc124dbbbfbe179d383174a1e33f110 Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Wed, 20 Nov 2024 22:30:15 -0600 Subject: [PATCH 091/118] Changes in test cases --- ...ssionDatasetSampleAnnotationFmsITCase.java | 38 +++++++++---------- ...E_01_update_empty_non_required_fields.json | 4 +- .../UM_01_update_no_non_required_fields.json | 4 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java index e7458c4f8..74827885f 100644 --- a/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/HTPExpressionDatasetSampleAnnotationFmsITCase.java @@ -115,18 +115,18 @@ public void htpDatasetSampleBulkUploadInvalidFields() throws Exception { @Test @Order(5) public void htpDatasetSampleBulkUploadUpdateMissingNonRequiredFields() throws Exception { + checkSuccessfulBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "UM_01_update_no_non_required_fields.json"); RestAssured.given().when().header("Content-Type", "application/json").body("{}") - .post(htpDatasetSampleFindEndpoint).then().statusCode(200).body("totalResults", is(1)) - .body("results", hasSize(1)) - .body("results[0]", not(hasKey("abundance"))) - .body("results[0]", not(hasKey("htpExpressionSampleTitle"))) - .body("results[0]", not(hasKey("taxon"))) - .body("results[0]", not(hasKey("geneticSex"))) - .body("results[0]", not(hasKey("sequencingFormat"))) - .body("results[0]", not(hasKey("microarraySampleDetails"))) - .body("results[0]", not(hasKey("htpExpressionSampleLocations"))); + .post(htpDatasetSampleFindEndpoint).then().statusCode(200).body("totalResults", is(2)) + .body("results[2]", not(hasKey("abundance"))) + .body("results[2]", not(hasKey("htpExpressionSampleTitle"))) + .body("results[2]", not(hasKey("taxon"))) + .body("results[2]", not(hasKey("geneticSex"))) + .body("results[2]", not(hasKey("sequencingFormat"))) + .body("results[2]", not(hasKey("microarraySampleDetails"))) + .body("results[2]", not(hasKey("htpExpressionSampleLocations"))); } @Test @@ -136,16 +136,16 @@ public void htpDatasetSampleBulkUploadUpdateEmptyNonRequiredFields() throws Exce checkSuccessfulBulkLoad(htpDatasetSampleBulkPostEndpoint, htpDatasetSampleTestFilePath + "UE_01_update_empty_non_required_fields.json"); RestAssured.given().when().header("Content-Type", "application/json").body("{}") - .post(htpDatasetSampleFindEndpoint).then().statusCode(200).body("totalResults", is(1)) - .body("results", hasSize(1)) - .body("results[0]", not(hasKey("htpExpressionSampleAge"))) - .body("results[0]", not(hasKey("abundance"))) - .body("results[0]", not(hasKey("htpExpressionSampleTitle"))) - .body("results[0]", not(hasKey("taxon"))) - .body("results[0]", not(hasKey("geneticSex"))) - .body("results[0]", not(hasKey("sequencingFormat"))) - .body("results[0]", not(hasKey("microarraySampleDetails"))) - .body("results[0]", not(hasKey("htpExpressionSampleLocations"))); + .post(htpDatasetSampleFindEndpoint).then().statusCode(200).body("totalResults", is(3)) + .body("results", hasSize(3)) + .body("results[2]", not(hasKey("htpExpressionSampleAge"))) + .body("results[2]", not(hasKey("abundance"))) + .body("results[2]", not(hasKey("htpExpressionSampleTitle"))) + .body("results[2]", not(hasKey("taxon"))) + .body("results[2]", not(hasKey("geneticSex"))) + .body("results[2]", not(hasKey("sequencingFormat"))) + .body("results[2]", not(hasKey("microarraySampleDetails"))) + .body("results[2]", not(hasKey("htpExpressionSampleLocations"))); } diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json index 4353b46b3..eb074dd42 100644 --- a/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/UE_01_update_empty_non_required_fields.json @@ -2,10 +2,10 @@ "data": [ { "datasetIds" : [ - "GEO:GSE38764" + "GEO:TEST2" ], "sampleId" : { - "primaryId" : "GEO:GSE38764" + "primaryId" : "GEO:TEST2" }, "assayType" : "HTPMMO:assay001", "sampleType" : "HTPOBI:sample001", diff --git a/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json b/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json index cc85fa5f6..6dc5ccbe4 100644 --- a/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json +++ b/src/test/resources/bulk/fms/12_htp_dataset_sample/UM_01_update_no_non_required_fields.json @@ -2,10 +2,10 @@ "data": [ { "datasetIds" : [ - "GEO:GSE38764" + "GEO:TEST1" ], "sampleId" : { - "primaryId" : "GEO:GSE38764" + "primaryId" : "GEO:TEST1" }, "assayType" : "HTPMMO:assay001", "sampleType" : "HTPOBI:sample001", From a4f6a74dc4817fb9f8b8c624b1e218514194b5cb Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 21 Nov 2024 10:02:24 -0600 Subject: [PATCH 092/118] Changes in validation --- .../HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java index e5eabcb9b..be5cbcf1c 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/HTPExpressionDatasetSampleAnnotationFmsDTOValidator.java @@ -242,7 +242,7 @@ public HTPExpressionDatasetSampleAnnotation validateHTPExpressionDatasetSampleAn if (StringUtils.isNotEmpty(dto.getTaxonId())) { ObjectResponse taxonResponse = ncbiTaxonTermService.getByCurie(dto.getTaxonId()); - if (backendBulkDataProvider != null && (backendBulkDataProvider.name().equals("RGD") || backendBulkDataProvider.name().equals("HUMAN")) && !taxonResponse.getEntity().getCurie().equals(backendBulkDataProvider.canonicalTaxonCurie) || taxonResponse.getEntity() == null) { + if (taxonResponse.getEntity() == null || backendBulkDataProvider != null && (backendBulkDataProvider.name().equals("RGD") || backendBulkDataProvider.name().equals("HUMAN")) && !taxonResponse.getEntity().getCurie().equals(backendBulkDataProvider.canonicalTaxonCurie)) { htpSampleAnnotationResponse.addErrorMessage("taxonId", ValidationConstants.INVALID_MESSAGE + " (" + dto.getTaxonId() + ") for " + backendBulkDataProvider.name() + " load"); } htpSampleAnnotation.setTaxon(taxonResponse.getEntity()); From cebb90e65a007921d26b873138480b9837c25227 Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Thu, 21 Nov 2024 17:34:53 +0100 Subject: [PATCH 093/118] undo removal --- .../curation_api/jobs/processors/BulkLoadProcessor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java index ec73003b2..e61b1a910 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/processors/BulkLoadProcessor.java @@ -208,6 +208,7 @@ protected void startLoad(BulkLoadFileHistory bulkLoadFileHistory) { protected void endLoad(BulkLoadFileHistory bulkLoadFileHistory, String message, JobStatus status) { if (bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath() != null) { Log.info("Removing old input file: " + bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()); + new File(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()).delete(); bulkLoadFileHistory.getBulkLoadFile().setLocalFilePath(null); bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); } From a2def385fef5424a79ecdd28f0164bbbab47850e Mon Sep 17 00:00:00 2001 From: VarunReddy1111 Date: Thu, 21 Nov 2024 16:17:30 -0600 Subject: [PATCH 094/118] Added new columns to controlled vocabulary terms: --- .../ControlledVocabularyTable.js | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/main/cliapp/src/containers/controlledVocabularyPage/ControlledVocabularyTable.js b/src/main/cliapp/src/containers/controlledVocabularyPage/ControlledVocabularyTable.js index 383b1177b..6cba730c7 100644 --- a/src/main/cliapp/src/containers/controlledVocabularyPage/ControlledVocabularyTable.js +++ b/src/main/cliapp/src/containers/controlledVocabularyPage/ControlledVocabularyTable.js @@ -233,6 +233,37 @@ export const ControlledVocabularyTable = () => { editor: (props) => definitionEditorTemplate(props), body: (rowData) => , }, + { + field: 'updatedBy.uniqueId', + header: 'Updated By', + sortable: true, + body: (rowData) => , + filterConfig: FILTER_CONFIGS.updatedByFilterConfig, + }, + { + field: 'dateUpdated', + header: 'Date Updated', + sortable: true, + filter: true, + body: (rowData) => , + filterConfig: FILTER_CONFIGS.dateUpdatedFilterConfig, + }, + { + field: 'createdBy.uniqueId', + header: 'Created By', + sortable: true, + filter: true, + body: (rowData) => , + filterConfig: FILTER_CONFIGS.createdByFilterConfig, + }, + { + field: 'dateCreated', + header: 'Date Created', + sortable: true, + filter: true, + body: (rowData) => , + filterConfig: FILTER_CONFIGS.dataCreatedFilterConfig, + }, { field: 'obsolete', header: 'Obsolete', From 84578848482b4768c55b8a4bdc66a194f57653e1 Mon Sep 17 00:00:00 2001 From: Andres Becerra Date: Fri, 22 Nov 2024 09:14:43 +0000 Subject: [PATCH 095/118] SCRUM-3953: fix loadType for expression annotations --- .../curation_api/jobs/executors/GeneExpressionExecutor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index fee8b5d52..ca6baa9fa 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -57,7 +57,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { List experimentIdsLoaded = new ArrayList<>(); List experimentIdsBefore = geneExpressionExperimentService.getExperimentIdsByDataProvider(dataProvider); - boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded); + boolean success = runLoad(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider, geneExpressionIngestFmsDTO.getData(), annotationIdsLoaded, ANNOTATIONS); if (success) { runCleanup(geneExpressionAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, ANNOTATIONS); From d86f3b9a0ba81030a2dd7e3288f351a5ee21bb3e Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Fri, 22 Nov 2024 15:43:30 +0100 Subject: [PATCH 096/118] remove unused method, add logic to create new reference --- .../ConstructGenomicEntityAssociation.java | 2 +- .../InformationContentEntityService.java | 3 --- .../curation_api/services/ReferenceService.java | 16 +++++++--------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java index 90bc0ec44..67b208014 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/constructAssociations/ConstructGenomicEntityAssociation.java @@ -22,7 +22,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(callSuper = false) +@ToString(callSuper = true) @AGRCurationSchemaVersion(min = "2.2.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {EvidenceAssociation.class}) @Schema(name = "ConstructGenomicEntityAssociation", description = "POJO representing an association between a construct and a genomic entity") diff --git a/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java b/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java index c76141f38..2f74097e0 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/InformationContentEntityService.java @@ -29,7 +29,4 @@ public InformationContentEntity retrieveFromDbOrLiteratureService(String curieOr return ice; } - public InformationContentEntity retrieveFromDbOrLiteratureServicea(String pmid) { - return referenceService.retrieveShallowReferenceFromDbOrLiteratureService(pmid); - } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java b/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java index 20128b092..66adf9041 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/ReferenceService.java @@ -74,20 +74,18 @@ public Reference retrieveFromDbOrLiteratureService(String curieOrXref) { } @Transactional - public Reference retrieveShallowReferenceFromDbOrLiteratureService(String pmid) { + public Reference retrieveShallowReferenceFromDbOrLiteratureService(String curieOrXref) { Reference reference = null; - if (shallowReferenceCacheMap.containsKey(pmid)) { - reference = shallowReferenceCacheMap.get(pmid); + if (shallowReferenceCacheMap.containsKey(curieOrXref)) { + reference = shallowReferenceCacheMap.get(curieOrXref); } else { - Log.debug("Reference not cached, caching reference: (" + pmid + ")"); + Log.debug("Reference not cached, caching reference: (" + curieOrXref + ")"); if (shallowReferenceCacheMap.isEmpty()) { shallowReferenceCacheMap = referenceDAO.getShallowReferenceMap(); - reference = shallowReferenceCacheMap.get(pmid); + reference = shallowReferenceCacheMap.get(curieOrXref); } else { -/* - reference = findOrCreateReference(curieOrXref); - referenceCacheMap.put(curieOrXref, reference); -*/ + reference = findOrCreateReference(curieOrXref); + referenceCacheMap.put(curieOrXref, reference); } } return reference; From 8202bf0f0936943e4bc96737f6bac81a5f5673f0 Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Thu, 7 Nov 2024 09:03:17 -0600 Subject: [PATCH 097/118] save at second structural change --- .../curation_api/dao/CrossReferenceDAO.java | 21 +++ .../jobs/executors/BiogridOrcExecutor.java | 124 +++++++++++++++--- .../services/BioGridOrcsService.java | 52 ++++++++ .../services/CrossReferenceService.java | 5 + .../dto/fms/BioGridOrcsFmsDTOValidator.java | 32 +++++ 5 files changed, 213 insertions(+), 21 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java create mode 100644 src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java diff --git a/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java index 162d95391..a7fda812a 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java @@ -4,11 +4,16 @@ import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.alliancegenome.curation_api.model.entities.Gene; import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; +import io.quarkus.logging.Log; + +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; @ApplicationScoped public class CrossReferenceDAO extends BaseSQLDAO { @@ -32,6 +37,22 @@ public Map getGenesWithCrossRefs(ResourceDescriptorPage page) { return ensemblGeneMap; } + public Map getGenesWithCrossRefs(Set referencedCuries) { + String sql = """ + select gc.genomicentity_id, cr.referencedcurie from genomicentity_crossreference as gc, crossreference as cr + where gc.crossreferences_id = cr.id AND cr.referencedCurie IN (:referencedCuries) + """; + Query query = entityManager.createNativeQuery(sql); + query.setParameter("referencedCuries", referencedCuries); + List objects = query.getResultList(); + Map idCurieMap = new HashMap<>(); + objects.forEach(object -> { + idCurieMap.put((String) object[1], (Long) object[0]); + }); + return idCurieMap; + + } + public Integer persistAccessionGeneAssociated(Long crossReferenceID, Long geneID) { String sql = """ insert into genomicentity_crossreference (crossreferences_id,genomicentity_id) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java index b90f3f015..df6d3fb4f 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java @@ -1,17 +1,28 @@ package org.alliancegenome.curation_api.jobs.executors; import java.io.FileInputStream; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; +import org.alliancegenome.curation_api.dao.CrossReferenceDAO; +import org.alliancegenome.curation_api.dao.GeneDAO; +import org.alliancegenome.curation_api.dao.ResourceDescriptorPageDAO; import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.alliancegenome.curation_api.model.entities.Gene; +import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcFmsDTO; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.CrossReferenceService; +import org.alliancegenome.curation_api.services.DataProviderService; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; @@ -22,19 +33,50 @@ import com.fasterxml.jackson.dataformat.csv.CsvParser; import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import io.quarkus.logging.Log; import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; @ApplicationScoped public class BiogridOrcExecutor extends LoadFileExecutor { + @Inject + ResourceDescriptorPageDAO resourceDescriptorPageDAO; + + @Inject + GeneDAO geneDAO; + + @Inject + CrossReferenceDAO crossRefDAO; + + @Inject + CrossReferenceService crossReferenceService; + + //Todo: remove this and add to the service method once it's created + @Transactional public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try (TarArchiveInputStream tarInputStream = new TarArchiveInputStream( - new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())))) { + new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())))) { TarArchiveEntry tarEntry; Set biogridIds = new HashSet<>(); + HashMap rdpParams = new HashMap<>(); + rdpParams.put("name", "biogrid/orcs"); + ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams) + .getSingleResult(); + + int index = 0; + while ((tarEntry = tarInputStream.getNextEntry()) != null) { + if (tarEntry.getName().equals("BIOGRID-ORCS-SCREEN_1558-1.1.16.screen.tab.txt")) { + Log.debug("----------Starting file: -----------------------"); + Log.debug(tarEntry.getName()); + } + Log.debug("----------------on loop number:----------"); + Log.debug(index); + index++; CsvMapper csvMapper = new CsvMapper(); CsvSchema biogridOrcFmsSchema = CsvSchemaBuilder.biogridOrcFmsSchema(); @@ -54,7 +96,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { .readValues(tarInputStream.readAllBytes()); List biogridData = it.readAll(); - runLoad(bulkLoadFileHistory, biogridData, biogridIds); + runLoad(bulkLoadFileHistory, biogridData, resourceDescriptorPage); } } catch (Exception e) { @@ -63,32 +105,50 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { } } - private boolean runLoad(BulkLoadFileHistory history, List biogridList, Set biogridIds) { + private boolean runLoad(BulkLoadFileHistory history, List biogridList, ResourceDescriptorPage resourceDescriptorPage) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); if (CollectionUtils.isNotEmpty(biogridList)) { - String loadMessage = biogridList.get(0).getClass().getSimpleName() + " update"; - ph.startProcess(loadMessage, biogridList.size()); + try { + String loadMessage = "BioGrid update"; + Set referencedCuries = populateEntrezIdsFromFiles(biogridList, history); + ph.startProcess(loadMessage, referencedCuries.size()); + updateHistory(history); + + Map genomicEntityCrossRefMap = crossReferenceService.getGenomicEntityCrossRefMap(referencedCuries); + + for (String referencedCurie : genomicEntityCrossRefMap.keySet()) { + + HashMap crossRefParams = new HashMap<>(); + crossRefParams.put("referencedCurie", referencedCurie); + crossRefParams.put("displayName", referencedCurie); + crossRefParams.put("resourceDescriptorPage.id", resourceDescriptorPage.getId()); + + SearchResponse crossRefDupSearch = + crossRefDAO.findByParams(crossRefParams); + + if(!crossRefDupSearch.getResults().isEmpty()) continue; + + CrossReference newCrossRef = new CrossReference(); + newCrossRef.setReferencedCurie(referencedCurie); + newCrossRef.setDisplayName("BioGRID CRISPR Screen Cell Line Phenotypes"); + newCrossRef.setResourceDescriptorPage(resourceDescriptorPage); + + crossRefDAO.persist(newCrossRef); + + crossRefDAO.persistAccessionGeneAssociated(newCrossRef.getId(), genomicEntityCrossRefMap.get(referencedCurie)); + + + history.incrementCompleted(); - updateHistory(history); - for (BiogridOrcFmsDTO biogridOrcFmsDTO : biogridList) { - try { - if (biogridOrcFmsDTO.getIdentifierType().equals("ENTREZ_GENE")) { - String identifier = "NCBI_Gene:" + biogridOrcFmsDTO.getIdentifierId(); - biogridIds.add(identifier); - history.incrementCompleted(); - } else { - history.incrementSkipped(); - - } - } catch (Exception e) { - e.printStackTrace(); - history.incrementFailed(); - addException(history, - new ObjectUpdateExceptionData(biogridOrcFmsDTO, e.getMessage(), e.getStackTrace())); } + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(biogridOrcFmsDTO, e.getMessage(), e.getStackTrace())); ph.progressProcess(); } + history.incrementCompleted(); updateHistory(history); updateExceptions(history); ph.finishProcess(); @@ -96,4 +156,26 @@ private boolean runLoad(BulkLoadFileHistory history, List biog return true; } + + private Set populateEntrezIdsFromFiles(List biogridList, BulkLoadFileHistory history) { + Set biogridIds = new HashSet<>(); + + for (BiogridOrcFmsDTO biogridOrcFmsDTO : biogridList) { + try { + if (!biogridOrcFmsDTO.getIdentifierType().equals("ENTREZ_GENE")){ + history.incrementSkipped(); + continue; + } + + String identifier = "NCBI_Gene:" + biogridOrcFmsDTO.getIdentifierId(); + biogridIds.add(identifier); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + return biogridIds; + + } } \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java b/src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java new file mode 100644 index 000000000..4c401f886 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java @@ -0,0 +1,52 @@ +package org.alliancegenome.curation_api.services; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.alliancegenome.curation_api.constants.EntityFieldConstants; +import org.alliancegenome.curation_api.dao.CrossReferenceDAO; +import org.alliancegenome.curation_api.dao.SequenceTargetingReagentDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; +import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.alliancegenome.curation_api.model.entities.SequenceTargetingReagent; +import org.alliancegenome.curation_api.model.ingest.dto.fms.SequenceTargetingReagentFmsDTO; +import org.alliancegenome.curation_api.services.base.SubmittedObjectCrudService; +import org.alliancegenome.curation_api.services.validation.dto.fms.BioGridOrcsFmsDTOValidator; +import org.alliancegenome.curation_api.services.validation.dto.fms.SequenceTargetingReagentFmsDTOValidator; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; + +@RequestScoped +public class BioGridOrcsService { + + @Inject BioGridOrcsFmsDTOValidator bioGridOrcsFmsDTOValidator; + @Inject CrossReferenceDAO crossReferenceDAO; + + // @Override + // @PostConstruct + // protected void init() { + // setSQLDao(crossReferenceDAO); + // } + + @Transactional + public CrossReference insert(BioGridOrcsFmsDTOValidator dto, BackendBulkDataProvider dataProvider) throws ValidationException { + // CrossReference crossReference = bioGridOrcsFmsDTOValidator.validateSQTRFmsDTO(dto, dataProvider); + return new CrossReference(); + // return crossReferenceDAO.persist(sqtr); + } + + // public List getIdsByDataProvider(String dataProvider) { + // Map params = new HashMap<>(); + // params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider); + // List ids = sqtrDAO.findIdsByParams(params); + // ids.removeIf(Objects::isNull); + // return ids; + // } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java b/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java index eff5ba8d7..87a1644bc 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java @@ -16,6 +16,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; @RequestScoped public class CrossReferenceService extends BaseEntityCrudService { @@ -109,4 +110,8 @@ public String getCrossReferenceUniqueId(CrossReference xref) { public Map getGenomicEntityCrossRefMap(ResourceDescriptorPage page) { return crossReferenceDAO.getGenesWithCrossRefs(page); } + + public Map getGenomicEntityCrossRefMap(Set referencedCuries) { + return crossReferenceDAO.getGenesWithCrossRefs(referencedCuries); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java new file mode 100644 index 000000000..92c6e9a2e --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java @@ -0,0 +1,32 @@ +package org.alliancegenome.curation_api.services.validation.dto.fms; + +import java.util.HashMap; + +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcFmsDTO; +import org.alliancegenome.curation_api.response.SearchResponse; + +import jakarta.enterprise.context.RequestScoped; + +@RequestScoped +public class BioGridOrcsFmsDTOValidator { + + //Todo: rename? + public CrossReference validateBioGridOrcsFmsDTO(String referencedCurie){ + // HashMap crossRefParams = new HashMap<>(); + // crossRefParams.put("referencedCurie", referencedCurie); + // crossRefParams.put("displayName", referencedCurie); + // crossRefParams.put("resourceDescriptorPage.id", resourceDescriptorPage.getId()); + + // // Log.debug("--------------crossRefDupSearch----------------"); + // SearchResponse crossRefDupSearch = + // crossRefDAO.findByParams(crossRefParams); + // Log.debug(crossRefDupSearch.getResults().isEmpty()); + + // if(!crossRefDupSearch.getResults().isEmpty()) continue; + + return new CrossReference(); + } + +} From c4786bcc0a8221968fd41d59278ddfc309788f78 Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Tue, 12 Nov 2024 15:23:56 -0600 Subject: [PATCH 098/118] SCRUM-4513 add cleanup code --- .../curation_api/dao/CrossReferenceDAO.java | 4 - .../jobs/executors/BiogridOrcExecutor.java | 94 +++++++++++-------- .../services/DataProviderService.java | 15 +++ 3 files changed, 69 insertions(+), 44 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java index a7fda812a..02724376d 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java @@ -4,12 +4,8 @@ import jakarta.persistence.Query; import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.entities.Gene; import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; -import io.quarkus.logging.Log; - -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java index df6d3fb4f..2d5f8fa6a 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java @@ -1,10 +1,12 @@ package org.alliancegenome.curation_api.jobs.executors; import java.io.FileInputStream; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -13,16 +15,16 @@ import org.alliancegenome.curation_api.dao.CrossReferenceDAO; import org.alliancegenome.curation_api.dao.GeneDAO; import org.alliancegenome.curation_api.dao.ResourceDescriptorPageDAO; -import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.entities.Gene; +import org.alliancegenome.curation_api.model.entities.DataProvider; +import org.alliancegenome.curation_api.model.entities.Organization; import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcFmsDTO; -import org.alliancegenome.curation_api.response.SearchResponse; import org.alliancegenome.curation_api.services.CrossReferenceService; import org.alliancegenome.curation_api.services.DataProviderService; +import org.alliancegenome.curation_api.services.OrganizationService; import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; @@ -33,10 +35,8 @@ import com.fasterxml.jackson.dataformat.csv.CsvParser; import com.fasterxml.jackson.dataformat.csv.CsvSchema; -import io.quarkus.logging.Log; import jakarta.enterprise.context.ApplicationScoped; import jakarta.inject.Inject; -import jakarta.transaction.Transactional; @ApplicationScoped public class BiogridOrcExecutor extends LoadFileExecutor { @@ -53,30 +53,40 @@ public class BiogridOrcExecutor extends LoadFileExecutor { @Inject CrossReferenceService crossReferenceService; - //Todo: remove this and add to the service method once it's created - @Transactional + @Inject + OrganizationService organizationService; + + @Inject + DataProviderService dataProviderService; + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { try (TarArchiveInputStream tarInputStream = new TarArchiveInputStream( new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())))) { TarArchiveEntry tarEntry; - Set biogridIds = new HashSet<>(); + List biogridData = new ArrayList<>(); + String name = bulkLoadFileHistory.getBulkLoad().getName(); + String dataProviderName = name.substring(0, name.indexOf(" ")); + + Organization organization = organizationService.getByAbbr(dataProviderName).getEntity(); HashMap rdpParams = new HashMap<>(); rdpParams.put("name", "biogrid/orcs"); ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams) .getSingleResult(); - int index = 0; + List dataProviderIdsBefore = new ArrayList<>( + dataProviderService.getDataProviderMap(organization, resourceDescriptorPage) + .values() + .stream() + .map(DataProvider::getId) + .toList()); + + dataProviderIdsBefore.removeIf(Objects::isNull); + + List dataProviderIdsLoaded = new ArrayList<>(); while ((tarEntry = tarInputStream.getNextEntry()) != null) { - if (tarEntry.getName().equals("BIOGRID-ORCS-SCREEN_1558-1.1.16.screen.tab.txt")) { - Log.debug("----------Starting file: -----------------------"); - Log.debug(tarEntry.getName()); - } - Log.debug("----------------on loop number:----------"); - Log.debug(index); - index++; CsvMapper csvMapper = new CsvMapper(); CsvSchema biogridOrcFmsSchema = CsvSchemaBuilder.biogridOrcFmsSchema(); @@ -95,17 +105,23 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { .with(biogridOrcFmsSchema) .readValues(tarInputStream.readAllBytes()); - List biogridData = it.readAll(); - runLoad(bulkLoadFileHistory, biogridData, resourceDescriptorPage); + biogridData.addAll(it.readAll()); } + runLoad(bulkLoadFileHistory, biogridData, resourceDescriptorPage, organization, dataProviderService, + dataProviderIdsLoaded); + + runCleanup(dataProviderService, bulkLoadFileHistory, dataProviderName, dataProviderIdsBefore, + dataProviderIdsLoaded, "Biogrid Orc Load Type"); } catch (Exception e) { failLoad(bulkLoadFileHistory, e); e.printStackTrace(); } } - private boolean runLoad(BulkLoadFileHistory history, List biogridList, ResourceDescriptorPage resourceDescriptorPage) { + private boolean runLoad(BulkLoadFileHistory history, List biogridList, + ResourceDescriptorPage resourceDescriptorPage, Organization organization, + DataProviderService dataProviderService, List dataProviderIdsLoaded) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); if (CollectionUtils.isNotEmpty(biogridList)) { @@ -115,37 +131,35 @@ private boolean runLoad(BulkLoadFileHistory history, List biog ph.startProcess(loadMessage, referencedCuries.size()); updateHistory(history); - Map genomicEntityCrossRefMap = crossReferenceService.getGenomicEntityCrossRefMap(referencedCuries); - - for (String referencedCurie : genomicEntityCrossRefMap.keySet()) { - - HashMap crossRefParams = new HashMap<>(); - crossRefParams.put("referencedCurie", referencedCurie); - crossRefParams.put("displayName", referencedCurie); - crossRefParams.put("resourceDescriptorPage.id", resourceDescriptorPage.getId()); - - SearchResponse crossRefDupSearch = - crossRefDAO.findByParams(crossRefParams); + Map genomicEntityCrossRefMap = crossReferenceService + .getGenomicEntityCrossRefMap(referencedCuries); - if(!crossRefDupSearch.getResults().isEmpty()) continue; + for (String referencedCurie : referencedCuries) { CrossReference newCrossRef = new CrossReference(); newCrossRef.setReferencedCurie(referencedCurie); newCrossRef.setDisplayName("BioGRID CRISPR Screen Cell Line Phenotypes"); newCrossRef.setResourceDescriptorPage(resourceDescriptorPage); - - crossRefDAO.persist(newCrossRef); - - crossRefDAO.persistAccessionGeneAssociated(newCrossRef.getId(), genomicEntityCrossRefMap.get(referencedCurie)); - - - history.incrementCompleted(); + + DataProvider provider = new DataProvider(); + provider.setSourceOrganization(organization); + provider.setCrossReference(newCrossRef); + + DataProvider entity = dataProviderService + .insertBioGridOrcDataProvider(provider, genomicEntityCrossRefMap.get(referencedCurie)) + .getEntity(); + + if (entity != null) { + dataProviderIdsLoaded.add(entity.getId()); + history.incrementCompleted(); + } else { + history.incrementSkipped(); + } } } catch (Exception e) { e.printStackTrace(); history.incrementFailed(); - addException(history, new ObjectUpdateExceptionData(biogridOrcFmsDTO, e.getMessage(), e.getStackTrace())); ph.progressProcess(); } history.incrementCompleted(); @@ -162,7 +176,7 @@ private Set populateEntrezIdsFromFiles(List biogridLis for (BiogridOrcFmsDTO biogridOrcFmsDTO : biogridList) { try { - if (!biogridOrcFmsDTO.getIdentifierType().equals("ENTREZ_GENE")){ + if (!biogridOrcFmsDTO.getIdentifierType().equals("ENTREZ_GENE")) { history.incrementSkipped(); continue; } diff --git a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java index f19f2e68f..7feebaa4f 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java @@ -92,6 +92,21 @@ public ObjectResponse insertExpressionAtlasDataProvider(DataProvid return new ObjectResponse<>(dbEntity); } + @Transactional + public ObjectResponse insertBioGridOrcDataProvider(DataProvider entity, Long geneticEntityId) { + String referencedCurie = entity.getCrossReference().getReferencedCurie(); + + DataProvider dbEntity = getDataProvider(entity.getSourceOrganization(), referencedCurie, entity.getCrossReference().getResourceDescriptorPage()); + + // we only create new records, no updates + if (dbEntity == null) { + dataProviderDAO.persist(entity); + crossReferenceDAO.persistAccessionGeneAssociated(entity.getCrossReference().getId(), geneticEntityId); + return new ObjectResponse<>(entity); + } + return new ObjectResponse<>(dbEntity); + } + @NotNull public static String getFullReferencedCurie(String localReferencedCurie) { return RESOURCE_DESCRIPTOR_PREFIX + ":" + localReferencedCurie; From 6a4ae09dfd9ebd79bb58c408852702905b82122d Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Wed, 13 Nov 2024 14:53:55 -0600 Subject: [PATCH 099/118] SCRUM-4513 began adding integration tests --- .../crud/CrossReferenceCrudController.java | 11 ++++ .../crud/CrossReferenceCrudInterface.java | 12 ++++ .../jobs/executors/BiogridOrcExecutor.java | 23 ++++++- .../ingest/dto/fms/BiogridOrcFmsDTO.java | 4 -- .../dto/fms/BiogridOrcIngestFmsDTO.java | 16 +++++ .../services/BioGridOrcsService.java | 52 ---------------- .../dto/fms/BioGridOrcsFmsDTOValidator.java | 32 ---------- .../BiogridOrcBulkUploadFmsITCase.java | 61 +++++++++++++++++++ .../bulk/fms/12_biogrid/AF_01_all_fields.json | 32 ++++++++++ 9 files changed, 152 insertions(+), 91 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcIngestFmsDTO.java delete mode 100644 src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java delete mode 100644 src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java create mode 100644 src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java create mode 100644 src/test/resources/bulk/fms/12_biogrid/AF_01_all_fields.json diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java index 48de548e8..c9704a1e6 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java @@ -3,7 +3,10 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.CrossReferenceDAO; import org.alliancegenome.curation_api.interfaces.crud.CrossReferenceCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.BiogridOrcExecutor; import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcIngestFmsDTO; +import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.services.CrossReferenceService; import jakarta.annotation.PostConstruct; @@ -16,9 +19,17 @@ public class CrossReferenceCrudController extends BaseEntityCrudController { + + @POST + @Path("/bulk/{dataProvider}/biogridfile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateBiogridOrc(@PathParam("dataProvider") String dataProvider, BiogridOrcIngestFmsDTO biogridOrcData); } \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java index 2d5f8fa6a..c431187de 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java @@ -22,6 +22,8 @@ import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcFmsDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; import org.alliancegenome.curation_api.services.CrossReferenceService; import org.alliancegenome.curation_api.services.DataProviderService; import org.alliancegenome.curation_api.services.OrganizationService; @@ -72,8 +74,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { HashMap rdpParams = new HashMap<>(); rdpParams.put("name", "biogrid/orcs"); - ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams) - .getSingleResult(); + ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams).getSingleResult(); List dataProviderIdsBefore = new ArrayList<>( dataProviderService.getDataProviderMap(organization, resourceDescriptorPage) @@ -140,7 +141,7 @@ private boolean runLoad(BulkLoadFileHistory history, List biog newCrossRef.setReferencedCurie(referencedCurie); newCrossRef.setDisplayName("BioGRID CRISPR Screen Cell Line Phenotypes"); newCrossRef.setResourceDescriptorPage(resourceDescriptorPage); - + DataProvider provider = new DataProvider(); provider.setSourceOrganization(organization); provider.setCrossReference(newCrossRef); @@ -171,6 +172,22 @@ private boolean runLoad(BulkLoadFileHistory history, List biog return true; } + public APIResponse runLoadApi(String dataProviderName, List biogridDTOs) { + List dataProviderIdsLoaded = new ArrayList<>(); + Organization organization = organizationService.getByAbbr(dataProviderName).getEntity(); + + HashMap rdpParams = new HashMap<>(); + rdpParams.put("name", "biogrid/orcs"); + ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams).getSingleResult(); + + BulkLoadFileHistory history = new BulkLoadFileHistory(biogridDTOs.size()); + history = bulkLoadFileHistoryDAO.persist(history); + runLoad(history, biogridDTOs, resourceDescriptorPage, organization, dataProviderService, dataProviderIdsLoaded); + history.finishLoad(); + + return new LoadHistoryResponce(history); + } + private Set populateEntrezIdsFromFiles(List biogridList, BulkLoadFileHistory history) { Set biogridIds = new HashSet<>(); diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcFmsDTO.java index 6742ac634..10ef0c806 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcFmsDTO.java +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcFmsDTO.java @@ -1,8 +1,5 @@ package org.alliancegenome.curation_api.model.ingest.dto.fms; -import java.util.List; - -import org.alliancegenome.curation_api.model.ingest.dto.CrossReferenceDTO; import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; import lombok.Data; @@ -25,5 +22,4 @@ public class BiogridOrcFmsDTO extends BaseDTO { private Double score5; private String hit; private String source; - private List crossReferenceDtos; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcIngestFmsDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcIngestFmsDTO.java new file mode 100644 index 000000000..45b40347d --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/BiogridOrcIngestFmsDTO.java @@ -0,0 +1,16 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +import java.util.List; + +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class BiogridOrcIngestFmsDTO extends BaseDTO { + + private MetaDataFmsDTO metaData; + private List data; +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java b/src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java deleted file mode 100644 index 4c401f886..000000000 --- a/src/main/java/org/alliancegenome/curation_api/services/BioGridOrcsService.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.alliancegenome.curation_api.services; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -import org.alliancegenome.curation_api.constants.EntityFieldConstants; -import org.alliancegenome.curation_api.dao.CrossReferenceDAO; -import org.alliancegenome.curation_api.dao.SequenceTargetingReagentDAO; -import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.exceptions.ValidationException; -import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; -import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.entities.SequenceTargetingReagent; -import org.alliancegenome.curation_api.model.ingest.dto.fms.SequenceTargetingReagentFmsDTO; -import org.alliancegenome.curation_api.services.base.SubmittedObjectCrudService; -import org.alliancegenome.curation_api.services.validation.dto.fms.BioGridOrcsFmsDTOValidator; -import org.alliancegenome.curation_api.services.validation.dto.fms.SequenceTargetingReagentFmsDTOValidator; - -import jakarta.annotation.PostConstruct; -import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; -import jakarta.transaction.Transactional; - -@RequestScoped -public class BioGridOrcsService { - - @Inject BioGridOrcsFmsDTOValidator bioGridOrcsFmsDTOValidator; - @Inject CrossReferenceDAO crossReferenceDAO; - - // @Override - // @PostConstruct - // protected void init() { - // setSQLDao(crossReferenceDAO); - // } - - @Transactional - public CrossReference insert(BioGridOrcsFmsDTOValidator dto, BackendBulkDataProvider dataProvider) throws ValidationException { - // CrossReference crossReference = bioGridOrcsFmsDTOValidator.validateSQTRFmsDTO(dto, dataProvider); - return new CrossReference(); - // return crossReferenceDAO.persist(sqtr); - } - - // public List getIdsByDataProvider(String dataProvider) { - // Map params = new HashMap<>(); - // params.put(EntityFieldConstants.DATA_PROVIDER, dataProvider); - // List ids = sqtrDAO.findIdsByParams(params); - // ids.removeIf(Objects::isNull); - // return ids; - // } -} diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java deleted file mode 100644 index 92c6e9a2e..000000000 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/BioGridOrcsFmsDTOValidator.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.alliancegenome.curation_api.services.validation.dto.fms; - -import java.util.HashMap; - -import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; -import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcFmsDTO; -import org.alliancegenome.curation_api.response.SearchResponse; - -import jakarta.enterprise.context.RequestScoped; - -@RequestScoped -public class BioGridOrcsFmsDTOValidator { - - //Todo: rename? - public CrossReference validateBioGridOrcsFmsDTO(String referencedCurie){ - // HashMap crossRefParams = new HashMap<>(); - // crossRefParams.put("referencedCurie", referencedCurie); - // crossRefParams.put("displayName", referencedCurie); - // crossRefParams.put("resourceDescriptorPage.id", resourceDescriptorPage.getId()); - - // // Log.debug("--------------crossRefDupSearch----------------"); - // SearchResponse crossRefDupSearch = - // crossRefDAO.findByParams(crossRefParams); - // Log.debug(crossRefDupSearch.getResults().isEmpty()); - - // if(!crossRefDupSearch.getResults().isEmpty()) continue; - - return new CrossReference(); - } - -} diff --git a/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java new file mode 100644 index 000000000..0c52d02b8 --- /dev/null +++ b/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java @@ -0,0 +1,61 @@ +package org.alliancegenome.curation_api; + +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +import org.alliancegenome.curation_api.base.BaseITCase; +import org.alliancegenome.curation_api.resources.TestContainerResource; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +import io.quarkus.test.common.QuarkusTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.RestAssured; +import io.restassured.config.HttpClientConfig; +import io.restassured.config.RestAssuredConfig; + +@QuarkusIntegrationTest +@QuarkusTestResource(TestContainerResource.Initializer.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@DisplayName("612 - Biogrid Orc bulk upload - FMS") +@Order(612) +public class BiogridOrcBulkUploadFmsITCase extends BaseITCase { + + @BeforeEach + public void init() { + RestAssured.config = RestAssuredConfig.config() + .httpClient(HttpClientConfig.httpClientConfig() + .setParam("http.socket.timeout", 100000) + .setParam("http.connection.timeout", 100000)); + } + + private final String biogridOrcBulkPostEndpoint = "/api/cross-reference/bulk/FB/biogridfile"; + private final String biogridOrcTestFilePath = "src/test/resources/bulk/fms/12_biogrid/"; + private final String biogridOrcFindEndpoint = "/api/cross-reference/find?limit=100&page=0"; + + @Test + @Order(1) + public void biogridOrcBulkUploadCheckFields() throws Exception { + + checkSuccessfulBulkLoad(biogridOrcBulkPostEndpoint, biogridOrcTestFilePath + "AF_01_all_fields.json", 1); + + RestAssured.given(). + when(). + header("Content-Type", "application/json"). + body("{}"). + post(biogridOrcFindEndpoint). + then(). + statusCode(200). + body("totalResults", is(1)). + body("results", hasSize(1)). + body("results[0].referencedCurie", is("NCBI_Gene:108101")). + body("results[0].displayName", is("BioGRID CRISPR Screen Cell Line Phenotypes")); + } + +} diff --git a/src/test/resources/bulk/fms/12_biogrid/AF_01_all_fields.json b/src/test/resources/bulk/fms/12_biogrid/AF_01_all_fields.json new file mode 100644 index 000000000..44ffa6cb6 --- /dev/null +++ b/src/test/resources/bulk/fms/12_biogrid/AF_01_all_fields.json @@ -0,0 +1,32 @@ +{ + "data": [ + { + "screenId": 1888, + "identifierId": "108101", + "identifierType": "ENTREZ_GENE", + "officialSymbol": "Fermt3", + "aliases": "C79673|Kindlin3", + "organismId": 10090, + "organismOfficial": "Mus musculus", + "score1": 205.0, + "score2": -2.2, + "score3": 1e-05, + "score4": null, + "score5": null, + "hit": "YES", + "source": "BioGRID ORCS" + } + ], + "metaData": { + "dateProduced": "2024-04-18T20:10:35-07:00", + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "biogrid/orc" + ] + }, + "type": "curated" + } + } +} From 158f00ffb7585bae9a7485c5259f0b4a95c7f0a9 Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Wed, 13 Nov 2024 15:39:23 -0600 Subject: [PATCH 100/118] SCRUM-4513 create biogrid controller --- .../crud/BiogridOrcCrudController.java | 35 +++++++++++++++++++ .../crud/CrossReferenceCrudController.java | 7 ---- .../crud/BiogridOrcCrudInterface.java | 26 ++++++++++++++ .../crud/CrossReferenceCrudInterface.java | 12 ------- 4 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 src/main/java/org/alliancegenome/curation_api/controllers/crud/BiogridOrcCrudController.java create mode 100644 src/main/java/org/alliancegenome/curation_api/interfaces/crud/BiogridOrcCrudInterface.java diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/BiogridOrcCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/BiogridOrcCrudController.java new file mode 100644 index 000000000..c7011821c --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/BiogridOrcCrudController.java @@ -0,0 +1,35 @@ +package org.alliancegenome.curation_api.controllers.crud; + +import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; +import org.alliancegenome.curation_api.dao.CrossReferenceDAO; +import org.alliancegenome.curation_api.interfaces.crud.BiogridOrcCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.BiogridOrcExecutor; +import org.alliancegenome.curation_api.model.entities.CrossReference; +import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcIngestFmsDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.services.CrossReferenceService; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class BiogridOrcCrudController extends BaseEntityCrudController implements BiogridOrcCrudInterface { + + @Inject + CrossReferenceService crossReferenceService; + + @Inject + BiogridOrcExecutor biogridOrcExecutor; + + @Override + @PostConstruct + protected void init() { + setService(crossReferenceService); + } + + @Override + public APIResponse updateBiogridOrc(String dataProvider, BiogridOrcIngestFmsDTO biogridOrcData) { + return biogridOrcExecutor.runLoadApi(dataProvider, biogridOrcData.getData()); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java index c9704a1e6..b45cf1ae2 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java @@ -5,8 +5,6 @@ import org.alliancegenome.curation_api.interfaces.crud.CrossReferenceCrudInterface; import org.alliancegenome.curation_api.jobs.executors.BiogridOrcExecutor; import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcIngestFmsDTO; -import org.alliancegenome.curation_api.response.APIResponse; import org.alliancegenome.curation_api.services.CrossReferenceService; import jakarta.annotation.PostConstruct; @@ -27,9 +25,4 @@ public class CrossReferenceCrudController extends BaseEntityCrudController { + + + @POST + @Path("/bulk/{dataProvider}/biogridfile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateBiogridOrc(@PathParam("dataProvider") String dataProvider, BiogridOrcIngestFmsDTO biogridOrcData); + +} diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/CrossReferenceCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/CrossReferenceCrudInterface.java index f57f7a0a0..5680e1211 100644 --- a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/CrossReferenceCrudInterface.java +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/CrossReferenceCrudInterface.java @@ -2,16 +2,9 @@ import org.alliancegenome.curation_api.interfaces.base.BaseIdCrudInterface; import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcIngestFmsDTO; -import org.alliancegenome.curation_api.response.APIResponse; -import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.tags.Tag; -import com.fasterxml.jackson.annotation.JsonView; - -import jakarta.websocket.server.PathParam; import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.POST; import jakarta.ws.rs.Path; import jakarta.ws.rs.Produces; import jakarta.ws.rs.core.MediaType; @@ -21,9 +14,4 @@ @Produces(MediaType.APPLICATION_JSON) @Consumes(MediaType.APPLICATION_JSON) public interface CrossReferenceCrudInterface extends BaseIdCrudInterface { - - @POST - @Path("/bulk/{dataProvider}/biogridfile") - @JsonView(View.FieldsAndLists.class) - APIResponse updateBiogridOrc(@PathParam("dataProvider") String dataProvider, BiogridOrcIngestFmsDTO biogridOrcData); } \ No newline at end of file From 8b2d562be84e0c53d95b3f48741ddfd4279bcf62 Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Wed, 13 Nov 2024 15:55:21 -0600 Subject: [PATCH 101/118] SCRUM-4513 update biogridOrcBulkPostEndpoint --- .../jobs/executors/BiogridOrcExecutor.java | 1 - .../BiogridOrcBulkUploadFmsITCase.java | 33 +++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java index c431187de..bd4c42b51 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java @@ -163,7 +163,6 @@ private boolean runLoad(BulkLoadFileHistory history, List biog history.incrementFailed(); ph.progressProcess(); } - history.incrementCompleted(); updateHistory(history); updateExceptions(history); ph.finishProcess(); diff --git a/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java index 0c52d02b8..f8624b2ce 100644 --- a/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java @@ -4,6 +4,8 @@ import static org.hamcrest.Matchers.is; import org.alliancegenome.curation_api.base.BaseITCase; +import org.alliancegenome.curation_api.model.entities.ResourceDescriptor; +import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; import org.alliancegenome.curation_api.resources.TestContainerResource; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; @@ -35,27 +37,32 @@ public void init() { .setParam("http.connection.timeout", 100000)); } - private final String biogridOrcBulkPostEndpoint = "/api/cross-reference/bulk/FB/biogridfile"; + ResourceDescriptorPage resourceDescriptorPage; + + private final String biogridOrcBulkPostEndpoint = "/api/biogrid-orc/bulk/FB/biogridfile"; private final String biogridOrcTestFilePath = "src/test/resources/bulk/fms/12_biogrid/"; private final String biogridOrcFindEndpoint = "/api/cross-reference/find?limit=100&page=0"; + private void loadRequiredEntities() throws Exception { + ResourceDescriptor rd = createResourceDescriptor("FB"); + createResourceDescriptorPage("biogrid/orcs", "http://test.org", rd); + } + @Test @Order(1) public void biogridOrcBulkUploadCheckFields() throws Exception { - + loadRequiredEntities(); checkSuccessfulBulkLoad(biogridOrcBulkPostEndpoint, biogridOrcTestFilePath + "AF_01_all_fields.json", 1); - RestAssured.given(). - when(). - header("Content-Type", "application/json"). - body("{}"). - post(biogridOrcFindEndpoint). - then(). - statusCode(200). - body("totalResults", is(1)). - body("results", hasSize(1)). - body("results[0].referencedCurie", is("NCBI_Gene:108101")). - body("results[0].displayName", is("BioGRID CRISPR Screen Cell Line Phenotypes")); + RestAssured.given() + .when() + .header("Content-Type", "application/json") + .body("{\"referencedCurie\": \"NCBI_Gene:108101\"}").post(biogridOrcFindEndpoint) + .then().statusCode(200) + .body("totalResults", is(1)) + .body("results", hasSize(1)) + .body("results[0].referencedCurie", is("NCBI_Gene:108101")) + .body("results[0].displayName", is("BioGRID CRISPR Screen Cell Line Phenotypes")); } } From b88acf6219b76e002ada44f668989fe4ecf22e1c Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Thu, 14 Nov 2024 12:11:54 -0600 Subject: [PATCH 102/118] SCRUM-4513 add test for duplicate entries --- .../BiogridOrcBulkUploadFmsITCase.java | 33 ++++++++++--- .../12_biogrid/DE_01_duplicate_entries.json | 49 +++++++++++++++++++ 2 files changed, 74 insertions(+), 8 deletions(-) create mode 100644 src/test/resources/bulk/fms/12_biogrid/DE_01_duplicate_entries.json diff --git a/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java index f8624b2ce..54e1a1f4b 100644 --- a/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/BiogridOrcBulkUploadFmsITCase.java @@ -55,14 +55,31 @@ public void biogridOrcBulkUploadCheckFields() throws Exception { checkSuccessfulBulkLoad(biogridOrcBulkPostEndpoint, biogridOrcTestFilePath + "AF_01_all_fields.json", 1); RestAssured.given() - .when() - .header("Content-Type", "application/json") - .body("{\"referencedCurie\": \"NCBI_Gene:108101\"}").post(biogridOrcFindEndpoint) - .then().statusCode(200) - .body("totalResults", is(1)) - .body("results", hasSize(1)) - .body("results[0].referencedCurie", is("NCBI_Gene:108101")) - .body("results[0].displayName", is("BioGRID CRISPR Screen Cell Line Phenotypes")); + .when() + .header("Content-Type", "application/json") + .body("{\"referencedCurie\": \"NCBI_Gene:108101\"}").post(biogridOrcFindEndpoint) + .then().statusCode(200) + .body("totalResults", is(1)) + .body("results", hasSize(1)) + .body("results[0].referencedCurie", is("NCBI_Gene:108101")) + .body("results[0].displayName", is("BioGRID CRISPR Screen Cell Line Phenotypes")); + } + + @Test + @Order(2) + public void biogridOrcBulkUploadDuplicateEntries() throws Exception { + checkBulkLoadRecordCounts(biogridOrcBulkPostEndpoint, biogridOrcTestFilePath + "DE_01_duplicate_entries.json", + "Records", 2, 0, 1, 0); + + RestAssured.given() + .when() + .header("Content-Type", "application/json") + .body("{\"referencedCurie\": \"NCBI_Gene:100001\"}").post(biogridOrcFindEndpoint) + .then().statusCode(200) + .body("totalResults", is(1)) + .body("results", hasSize(1)) + .body("results[0].referencedCurie", is("NCBI_Gene:100001")) + .body("results[0].displayName", is("BioGRID CRISPR Screen Cell Line Phenotypes")); } } diff --git a/src/test/resources/bulk/fms/12_biogrid/DE_01_duplicate_entries.json b/src/test/resources/bulk/fms/12_biogrid/DE_01_duplicate_entries.json new file mode 100644 index 000000000..677cb545b --- /dev/null +++ b/src/test/resources/bulk/fms/12_biogrid/DE_01_duplicate_entries.json @@ -0,0 +1,49 @@ +{ + "data": [ + { + "screenId": 1888, + "identifierId": "100001", + "identifierType": "ENTREZ_GENE", + "officialSymbol": "Fermt3", + "aliases": "C79673|Kindlin3", + "organismId": 10090, + "organismOfficial": "Mus musculus", + "score1": 205.0, + "score2": -2.2, + "score3": 1e-05, + "score4": null, + "score5": null, + "hit": "YES", + "source": "BioGRID ORCS" + }, + { + "screenId": 1888, + "identifierId": "100001", + "identifierType": "ENTREZ_GENE", + "officialSymbol": "Fermt3", + "aliases": "C79673|Kindlin3", + "organismId": 10090, + "organismOfficial": "Mus musculus", + "score1": 205.0, + "score2": -2.2, + "score3": 1e-05, + "score4": null, + "score5": null, + "hit": "YES", + "source": "BioGRID ORCS" + } + ], + "metaData": { + "dateProduced": "2024-04-18T20:10:35-07:00", + "dataProvider": { + "crossReference": { + "id": "FB", + "pages": [ + "biogrid/orc" + ] + }, + "type": "curated" + } + } + } + \ No newline at end of file From 396dd9624d8d45279e1bbebdf7a3a962ead1e2c3 Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Thu, 14 Nov 2024 16:18:44 -0600 Subject: [PATCH 103/118] SCRUM-4513 remove unused import --- .../controllers/crud/CrossReferenceCrudController.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java index b45cf1ae2..48de548e8 100644 --- a/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/CrossReferenceCrudController.java @@ -3,7 +3,6 @@ import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; import org.alliancegenome.curation_api.dao.CrossReferenceDAO; import org.alliancegenome.curation_api.interfaces.crud.CrossReferenceCrudInterface; -import org.alliancegenome.curation_api.jobs.executors.BiogridOrcExecutor; import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.services.CrossReferenceService; @@ -17,9 +16,6 @@ public class CrossReferenceCrudController extends BaseEntityCrudController Date: Fri, 22 Nov 2024 09:15:26 -0600 Subject: [PATCH 104/118] SCRUM-4513 remove DataProvider from biogrid load --- .../curation_api/dao/CrossReferenceDAO.java | 11 ++++++ .../jobs/executors/BiogridOrcExecutor.java | 38 +++++-------------- .../services/CrossReferenceService.java | 35 +++++++++++++++++ .../services/DataProviderService.java | 15 -------- 4 files changed, 55 insertions(+), 44 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java index 02724376d..5908c9952 100644 --- a/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java +++ b/src/main/java/org/alliancegenome/curation_api/dao/CrossReferenceDAO.java @@ -5,6 +5,8 @@ import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; +import org.alliancegenome.curation_api.model.input.Pagination; +import org.alliancegenome.curation_api.response.SearchResponse; import java.util.HashMap; import java.util.List; @@ -49,6 +51,15 @@ public Map getGenesWithCrossRefs(Set referencedCuries) { } + public List getAllCrossRefsByPage(ResourceDescriptorPage page) { + HashMap params = new HashMap<>(); + params.put("resourceDescriptorPage.name", page.getName()); + Pagination pagination = new Pagination(); + pagination.setLimit(10_000_000); + SearchResponse crossReferenceResponse = findByParams(pagination, params); + return crossReferenceResponse.getResults(); + } + public Integer persistAccessionGeneAssociated(Long crossReferenceID, Long geneID) { String sql = """ insert into genomicentity_crossreference (crossreferences_id,genomicentity_id) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java index bd4c42b51..f47f472bd 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BiogridOrcExecutor.java @@ -6,7 +6,6 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -17,7 +16,6 @@ import org.alliancegenome.curation_api.dao.ResourceDescriptorPageDAO; import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; import org.alliancegenome.curation_api.model.entities.CrossReference; -import org.alliancegenome.curation_api.model.entities.DataProvider; import org.alliancegenome.curation_api.model.entities.Organization; import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; @@ -74,18 +72,8 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { HashMap rdpParams = new HashMap<>(); rdpParams.put("name", "biogrid/orcs"); - ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams).getSingleResult(); - - List dataProviderIdsBefore = new ArrayList<>( - dataProviderService.getDataProviderMap(organization, resourceDescriptorPage) - .values() - .stream() - .map(DataProvider::getId) - .toList()); - - dataProviderIdsBefore.removeIf(Objects::isNull); - - List dataProviderIdsLoaded = new ArrayList<>(); + ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams) + .getSingleResult(); while ((tarEntry = tarInputStream.getNextEntry()) != null) { @@ -109,11 +97,8 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { biogridData.addAll(it.readAll()); } - runLoad(bulkLoadFileHistory, biogridData, resourceDescriptorPage, organization, dataProviderService, - dataProviderIdsLoaded); + runLoad(bulkLoadFileHistory, biogridData, resourceDescriptorPage, organization, dataProviderService); - runCleanup(dataProviderService, bulkLoadFileHistory, dataProviderName, dataProviderIdsBefore, - dataProviderIdsLoaded, "Biogrid Orc Load Type"); } catch (Exception e) { failLoad(bulkLoadFileHistory, e); e.printStackTrace(); @@ -122,7 +107,7 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { private boolean runLoad(BulkLoadFileHistory history, List biogridList, ResourceDescriptorPage resourceDescriptorPage, Organization organization, - DataProviderService dataProviderService, List dataProviderIdsLoaded) { + DataProviderService dataProviderService) { ProcessDisplayHelper ph = new ProcessDisplayHelper(); ph.addDisplayHandler(loadProcessDisplayService); if (CollectionUtils.isNotEmpty(biogridList)) { @@ -142,16 +127,11 @@ private boolean runLoad(BulkLoadFileHistory history, List biog newCrossRef.setDisplayName("BioGRID CRISPR Screen Cell Line Phenotypes"); newCrossRef.setResourceDescriptorPage(resourceDescriptorPage); - DataProvider provider = new DataProvider(); - provider.setSourceOrganization(organization); - provider.setCrossReference(newCrossRef); - - DataProvider entity = dataProviderService - .insertBioGridOrcDataProvider(provider, genomicEntityCrossRefMap.get(referencedCurie)) + CrossReference entity = crossReferenceService + .insertBioGridOrcCrossReference(newCrossRef, genomicEntityCrossRefMap.get(referencedCurie)) .getEntity(); if (entity != null) { - dataProviderIdsLoaded.add(entity.getId()); history.incrementCompleted(); } else { history.incrementSkipped(); @@ -172,16 +152,16 @@ private boolean runLoad(BulkLoadFileHistory history, List biog } public APIResponse runLoadApi(String dataProviderName, List biogridDTOs) { - List dataProviderIdsLoaded = new ArrayList<>(); Organization organization = organizationService.getByAbbr(dataProviderName).getEntity(); HashMap rdpParams = new HashMap<>(); rdpParams.put("name", "biogrid/orcs"); - ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams).getSingleResult(); + ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams) + .getSingleResult(); BulkLoadFileHistory history = new BulkLoadFileHistory(biogridDTOs.size()); history = bulkLoadFileHistoryDAO.persist(history); - runLoad(history, biogridDTOs, resourceDescriptorPage, organization, dataProviderService, dataProviderIdsLoaded); + runLoad(history, biogridDTOs, resourceDescriptorPage, organization, dataProviderService); history.finishLoad(); return new LoadHistoryResponce(history); diff --git a/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java b/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java index 87a1644bc..670bddd20 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/CrossReferenceService.java @@ -8,6 +8,7 @@ import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage; import org.alliancegenome.curation_api.model.ingest.dto.fms.CrossReferenceFmsDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -16,10 +17,12 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; @RequestScoped public class CrossReferenceService extends BaseEntityCrudService { + HashMap crossReferenceCache = new HashMap<>(); @Inject CrossReferenceDAO crossReferenceDAO; @@ -114,4 +117,36 @@ public Map getGenomicEntityCrossRefMap(ResourceDescriptorPage page public Map getGenomicEntityCrossRefMap(Set referencedCuries) { return crossReferenceDAO.getGenesWithCrossRefs(referencedCuries); } + + @Transactional + public ObjectResponse insertBioGridOrcCrossReference(CrossReference crossReference, Long geneticEntityId) { + String referencedCurie = crossReference.getReferencedCurie(); + + CrossReference dbEntity = getCrossReference(referencedCurie, crossReference.getResourceDescriptorPage()); + + // we only create new records, no updates + if (dbEntity == null) { + crossReferenceDAO.persist(crossReference); + crossReferenceDAO.persistAccessionGeneAssociated(crossReference.getId(), geneticEntityId); + return new ObjectResponse<>(crossReference); + } + return new ObjectResponse<>(dbEntity); + } + + private CrossReference getCrossReference(String crossReferenceCurie, ResourceDescriptorPage page) { + if (crossReferenceCache.size() > 0) { + return crossReferenceCache.get(crossReferenceCurie); + } + populateCrossReferenceCache(page); + return crossReferenceCache.get(crossReferenceCurie); + } + + private void populateCrossReferenceCache(ResourceDescriptorPage page) { + List allCrossRefs = crossReferenceDAO.getAllCrossRefsByPage(page); + allCrossRefs.stream() + .filter(crossRef -> Objects.equals(crossRef.getResourceDescriptorPage().getId(), page.getId())) + .forEach(crossRef -> { + crossReferenceCache.put(crossRef.getReferencedCurie(), crossRef); + }); + } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java index 7feebaa4f..f19f2e68f 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java +++ b/src/main/java/org/alliancegenome/curation_api/services/DataProviderService.java @@ -92,21 +92,6 @@ public ObjectResponse insertExpressionAtlasDataProvider(DataProvid return new ObjectResponse<>(dbEntity); } - @Transactional - public ObjectResponse insertBioGridOrcDataProvider(DataProvider entity, Long geneticEntityId) { - String referencedCurie = entity.getCrossReference().getReferencedCurie(); - - DataProvider dbEntity = getDataProvider(entity.getSourceOrganization(), referencedCurie, entity.getCrossReference().getResourceDescriptorPage()); - - // we only create new records, no updates - if (dbEntity == null) { - dataProviderDAO.persist(entity); - crossReferenceDAO.persistAccessionGeneAssociated(entity.getCrossReference().getId(), geneticEntityId); - return new ObjectResponse<>(entity); - } - return new ObjectResponse<>(dbEntity); - } - @NotNull public static String getFullReferencedCurie(String localReferencedCurie) { return RESOURCE_DESCRIPTOR_PREFIX + ":" + localReferencedCurie; From 57165c97174c228f8cf44f63dea313065ff828fd Mon Sep 17 00:00:00 2001 From: Christian Pich Date: Fri, 22 Nov 2024 19:36:01 +0100 Subject: [PATCH 105/118] SCRUM-4603 add convenience method for relation plus negation (#1738) * SCRUM-4603 add convenience method for relation plus negation * add braces * handle null relation --- .../model/entities/DiseaseAnnotation.java | 117 +++++++++--------- 1 file changed, 61 insertions(+), 56 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java index 96b84b062..c0db7c337 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/DiseaseAnnotation.java @@ -1,7 +1,13 @@ package org.alliancegenome.curation_api.model.entities; -import java.util.List; - +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonSubTypes.Type; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.*; +import lombok.Data; +import lombok.EqualsAndHashCode; import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; import org.alliancegenome.curation_api.model.bridges.BooleanValueBridge; @@ -19,25 +25,7 @@ import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonSubTypes.Type; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.annotation.JsonView; - -import jakarta.persistence.Column; -import jakarta.persistence.Entity; -import jakarta.persistence.Index; -import jakarta.persistence.Inheritance; -import jakarta.persistence.InheritanceType; -import jakarta.persistence.JoinColumn; -import jakarta.persistence.JoinTable; -import jakarta.persistence.ManyToMany; -import jakarta.persistence.ManyToOne; -import jakarta.persistence.Table; -import jakarta.persistence.Transient; -import lombok.Data; -import lombok.EqualsAndHashCode; +import java.util.List; @Inheritance(strategy = InheritanceType.JOINED) @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") @@ -49,7 +37,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { Annotation.class }) +@AGRCurationSchemaVersion(min = "2.8.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = {Annotation.class}) @Schema(name = "Disease_Annotation", description = "Annotation class representing a disease annotation") @Table(indexes = { @Index(name = "DiseaseAnnotation_internal_index", columnList = "internal"), @@ -73,29 +61,29 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"curie", "name", "secondaryIdentifiers", "synonyms.name", "namespace", - "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "namespace_keyword" }) + "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "namespace_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private DOTerm diseaseAnnotationObject; @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer", valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) @KeywordField(name = "negated_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) @Column(columnDefinition = "boolean default false", nullable = false) private Boolean negated = false; @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm relation; @IndexedEmbedded(includePaths = {"curie", "name", "secondaryIdentifiers", "synonyms.name", "abbreviation", - "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "abbreviation_keyword" }) + "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "abbreviation_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( joinColumns = @JoinColumn(name = "diseaseannotation_id"), inverseJoinColumns = @JoinColumn(name = "evidencecodes_id"), @@ -107,16 +95,16 @@ public abstract class DiseaseAnnotation extends Annotation { private List evidenceCodes; @IndexedEmbedded(includePaths = { - "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", - "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", - "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", - "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", - "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", - "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" + "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", + "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", + "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", + "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", + "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", + "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( joinColumns = @JoinColumn(name = "diseaseannotation_id"), inverseJoinColumns = @JoinColumn(name = "with_id"), @@ -130,13 +118,13 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm annotationType; @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( joinColumns = @JoinColumn(name = "diseaseannotation_id"), inverseJoinColumns = @JoinColumn(name = "diseasequalifiers_id"), @@ -150,27 +138,27 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm geneticSex; @IndexedEmbedded(includePaths = {"sourceOrganization.abbreviation", "sourceOrganization.fullName", "sourceOrganization.shortName", "crossReference.displayName", "crossReference.referencedCurie", - "sourceOrganization.abbreviation_keyword", "sourceOrganization.fullName_keyword", "sourceOrganization.shortName_keyword", "crossReference.displayName_keyword", "crossReference.referencedCurie_keyword"}) + "sourceOrganization.abbreviation_keyword", "sourceOrganization.fullName_keyword", "sourceOrganization.shortName_keyword", "crossReference.displayName_keyword", "crossReference.referencedCurie_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private DataProvider secondaryDataProvider; @IndexedEmbedded(includePaths = { - "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", - "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", - "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", - "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", - "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", - "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" + "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", + "geneSymbol.formatText", "geneSymbol.displayText", "geneSymbol.formatText_keyword", "geneSymbol.displayText_keyword", + "geneFullName.formatText", "geneFullName.displayText", "geneFullName.formatText_keyword", "geneFullName.displayText_keyword", + "geneSystematicName.formatText", "geneSystematicName.displayText", "geneSystematicName.formatText_keyword", "geneSystematicName.displayText_keyword", + "geneSynonyms.formatText", "geneSynonyms.displayText", "geneSynonyms.formatText_keyword", "geneSynonyms.displayText_keyword", + "geneSecondaryIds.secondaryId", "geneSecondaryIds.secondaryId_keyword", "name", "name_keyword", "symbol", "symbol_keyword" }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( name = "diseaseannotation_modifiergene", joinColumns = @JoinColumn(name = "diseaseannotation_id"), @@ -183,15 +171,15 @@ public abstract class DiseaseAnnotation extends Annotation { private List diseaseGeneticModifierGenes; @IndexedEmbedded(includePaths = { - "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", - "alleleSymbol.formatText", "alleleSymbol.displayText", "alleleSymbol.formatText_keyword", "alleleSymbol.displayText_keyword", - "alleleFullName.formatText", "alleleFullName.displayText", "alleleFullName.formatText_keyword", "alleleFullName.displayText_keyword", - "alleleSynonyms.formatText", "alleleSynonyms.displayText", "alleleSynonyms.formatText_keyword", "alleleSynonyms.displayText_keyword", - "alleleSecondaryIds.secondaryId", "alleleSecondaryIds.secondaryId_keyword" + "curie", "modEntityId", "modInternalId", "curie_keyword", "modEntityId_keyword", "modInternalId_keyword", + "alleleSymbol.formatText", "alleleSymbol.displayText", "alleleSymbol.formatText_keyword", "alleleSymbol.displayText_keyword", + "alleleFullName.formatText", "alleleFullName.displayText", "alleleFullName.formatText_keyword", "alleleFullName.displayText_keyword", + "alleleSynonyms.formatText", "alleleSynonyms.displayText", "alleleSynonyms.formatText_keyword", "alleleSynonyms.displayText_keyword", + "alleleSecondaryIds.secondaryId", "alleleSecondaryIds.secondaryId_keyword" }) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( name = "diseaseannotation_modifierallele", joinColumns = @JoinColumn(name = "diseaseannotation_id"), @@ -206,7 +194,7 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword", "curie", "curie_keyword", "modEntityId", "modEntityId_keyword", "modInternalId", "modInternalId_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToMany - @JsonView({ View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class }) + @JsonView({View.FieldsAndLists.class, View.DiseaseAnnotation.class, View.ForPublic.class}) @JoinTable( name = "diseaseannotation_modifieragm", joinColumns = @JoinColumn(name = "diseaseannotation_id"), @@ -221,7 +209,7 @@ public abstract class DiseaseAnnotation extends Annotation { @IndexedEmbedded(includePaths = {"name", "name_keyword"}) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @ManyToOne - @JsonView({ View.FieldsOnly.class, View.ForPublic.class }) + @JsonView({View.FieldsOnly.class, View.ForPublic.class}) private VocabularyTerm diseaseGeneticModifierRelation; @Transient @@ -232,7 +220,7 @@ public abstract class DiseaseAnnotation extends Annotation { @Transient public abstract String getSubjectSpeciesName(); - + @Transient public abstract String getSubjectIdentifier(); @@ -246,4 +234,21 @@ public String getDataProviderString() { } return builder.toString(); } + + @Transient + public String getFullRelationString() { + if (relation == null) { + return null; + } + if (!negated) { + return relation.getName(); + } + + if (relation.getName().equals("is_model_of")) { + return "does_not_model"; + } + return relation.getName().replaceFirst("_", "_not_"); + } + + } From fffd16c52eaf44f2953dfe17e931b0fc6f6bf03c Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 25 Nov 2024 11:56:31 +0000 Subject: [PATCH 106/118] Fix record counts --- .../curation_api/jobs/executors/GeneExpressionExecutor.java | 6 ++---- .../curation_api/jobs/executors/LoadFileExecutor.java | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java index ca6baa9fa..57150355b 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneExpressionExecutor.java @@ -48,9 +48,6 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { } bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - bulkLoadFileHistory.setCount(ANNOTATIONS, geneExpressionIngestFmsDTO.getData().size()); - updateHistory(bulkLoadFileHistory); - List annotationIdsLoaded = new ArrayList<>(); List annotationIdsBefore = geneExpressionAnnotationService.getAnnotationIdsByDataProvider(dataProvider); @@ -83,7 +80,7 @@ public APIResponse runLoadAPI(GeneExpressionAnnotationService service, String da if (dataProviderName != null) { dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); } - boolean success = runLoad(service, history, dataProvider, objectList, idsLoaded, true, "Records"); + boolean success = runLoad(service, history, dataProvider, objectList, idsLoaded, true, ANNOTATIONS); if (success) { loadExperiments(history, dataProvider, new ArrayList<>()); } @@ -114,5 +111,6 @@ private void loadExperiments(BulkLoadFileHistory history, BackendBulkDataProvide ph.progressProcess(); } updateHistory(history); + ph.finishProcess(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index 7f619c3d0..ef1034f1d 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -211,6 +211,7 @@ protected boolean runLoad(BaseUpser } ph.startProcess(loadMessage, objectList.size()); + history.setCount(countType, objectList.size()); updateHistory(history); for (T dtoObject : objectList) { try { @@ -230,7 +231,7 @@ protected boolean runLoad(BaseUpser history.incrementFailed(countType); addException(history, new ObjectUpdateExceptionData(dtoObject, e.getMessage(), e.getStackTrace())); } - if (terminateFailing && history.getErrorRate() > 0.25) { + if (terminateFailing && history.getErrorRate(countType) > 0.25) { Log.error("Failure Rate > 25% aborting load"); updateHistory(history); updateExceptions(history); From 1d21b06563976c56590caddd1c9aacfe44985941 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 25 Nov 2024 12:14:44 +0000 Subject: [PATCH 107/118] Update records counts for tests --- .../ExpressionBulkUploadFmsITCase.java | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java index c02becbf1..7b4430edf 100644 --- a/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/ExpressionBulkUploadFmsITCase.java @@ -13,6 +13,7 @@ import org.junit.jupiter.api.*; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import static org.hamcrest.Matchers.hasSize; @@ -68,7 +69,12 @@ public void init() { @Order(1) public void expressionBulkUploadAllFields() throws Exception { loadRequiredEntities(); - checkSuccessfulBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "AF_01_all_fields.json"); + + HashMap> params = new HashMap<>(); + params.put("Annotations", createCountParams(1, 0, 1, 0)); + params.put("Experiments", createCountParams(1, 0, 1, 0)); + + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "AF_01_all_fields.json", params); RestAssured.given().when() .header("Content-Type", "application/json") @@ -123,43 +129,52 @@ public void expressionBulkUploadAllFields() throws Exception { @Test @Order(2) public void expressionBulkUploadMissingRequiredFields() throws Exception { - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_01_no_geneId.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_02_no_dateAssigned.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_03_no_evidence.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_04_no_assay.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_05_no_whenExpressed.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_06_no_whereExpressed.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_07_nowhenExpressedStageName.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_08_nowhereExpressedStatement.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "MR_09_norCellComponentNORanatStructure.json"); + HashMap> params = new HashMap<>(); + params.put("Annotations", createCountParams(1, 1, 0, 0)); + + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_01_no_geneId.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_02_no_dateAssigned.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_03_no_evidence.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_04_no_assay.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_05_no_whenExpressed.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_06_no_whereExpressed.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_07_nowhenExpressedStageName.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_08_nowhereExpressedStatement.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "MR_09_norCellComponentNORanatStructure.json", params); } @Test @Order(3) public void expressionBulkUploadEmptyRequiredFields() throws Exception { - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "ER_01_empty_geneId.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "ER_02_empty_dateAssigned.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "ER_03_empty_assay.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "ER_04_empty_whenExpressedStageName.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "ER_05_empty_whereExpressedStatement.json"); + HashMap> params = new HashMap<>(); + params.put("Annotations", createCountParams(1, 1, 0, 0)); + + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "ER_01_empty_geneId.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "ER_02_empty_dateAssigned.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "ER_03_empty_assay.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "ER_04_empty_whenExpressedStageName.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "ER_05_empty_whereExpressedStatement.json", params); } @Test @Order(4) public void expressionBulkUploadInvalidFields() throws Exception { - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_01_invalid_geneId.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_02_invalid_dateAssigned.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_03_invalid_assay.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_04_invalid_publicationId.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_05_invalid_stageterm.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_06_invalid_anatomical_structure.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_07_invalid_anatomical_substructure.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_08_invalid_cellularcomponent.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_09_invalid_anatomicalstructurequalifier.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_10_invalid_anatomicalsubstructurequalifier.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_11_invalid_cellularcomponentqualifier.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_12_invalid_anatomicalstructureuberonslimterms.json"); - checkFailedBulkLoad(expressionBulkPostEndpoint, expressionTestFilePath + "IV_13_invalid_anatomicalsubstructureuberonslimterms.json"); + HashMap> params = new HashMap<>(); + params.put("Annotations", createCountParams(1, 1, 0, 0)); + + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_01_invalid_geneId.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_02_invalid_dateAssigned.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_03_invalid_assay.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_04_invalid_publicationId.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_05_invalid_stageterm.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_06_invalid_anatomical_structure.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_07_invalid_anatomical_substructure.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_08_invalid_cellularcomponent.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_09_invalid_anatomicalstructurequalifier.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_10_invalid_anatomicalsubstructurequalifier.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_11_invalid_cellularcomponentqualifier.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_12_invalid_anatomicalstructureuberonslimterms.json", params); + checkBulkLoadRecordCounts(expressionBulkPostEndpoint, expressionTestFilePath + "IV_13_invalid_anatomicalsubstructureuberonslimterms.json", params); } private void loadRequiredEntities() throws Exception { @@ -176,7 +191,7 @@ private void loadRequiredEntities() throws Exception { Vocabulary vocabulary2 = createVocabulary(VocabularyConstants.GENE_EXPRESSION_VOCABULARY, false); createVocabularyTerm(vocabulary2, VocabularyConstants.GENE_EXPRESSION_RELATION_TERM, false); Vocabulary stageUberonTermVocabulary = getVocabulary(VocabularyConstants.STAGE_UBERON_SLIM_TERMS); - Vocabulary spatialExpressionQualififerVocabulary = getVocabulary(VocabularyConstants.SPATIAL_EXPRESSION_QUALIFIERS); + Vocabulary spatialExpressionQualifierVocabulary = getVocabulary(VocabularyConstants.SPATIAL_EXPRESSION_QUALIFIERS); VocabularyTermSet anatatomicalStructureQualifierTermset = getVocabularyTermSet(VocabularyConstants.ANATOMICAL_STRUCTURE_QUALIFIER); VocabularyTermSet anatatomicalSubstructureQualifierTermset = getVocabularyTermSet(VocabularyConstants.ANATOMICAL_SUBSTRUCTURE_QUALIFIER); VocabularyTermSet cellularComponentQualifierTermset = getVocabularyTermSet(VocabularyConstants.CELLULAR_COMPONENT_QUALIFIER); From 7b3e44fec215412b8afb235fa49d52e37fdfc963 Mon Sep 17 00:00:00 2001 From: markquintontulloch Date: Mon, 25 Nov 2024 14:03:58 +0000 Subject: [PATCH 108/118] Update count logic for DAs --- .../jobs/executors/AgmDiseaseAnnotationExecutor.java | 8 +++----- .../jobs/executors/AlleleDiseaseAnnotationExecutor.java | 9 +++------ .../jobs/executors/GeneDiseaseAnnotationExecutor.java | 8 +++----- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java index 3e3cf0656..de4318c49 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AgmDiseaseAnnotationExecutor.java @@ -51,12 +51,10 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { bulkLoadFileHistory.getBulkLoadFile().setRecordCount(annotations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - bulkLoadFileHistory.setCount(annotations.size()); - updateHistory(bulkLoadFileHistory); - - boolean success = runLoad(agmDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded); + String countType = "AGM Disease Annotations"; + boolean success = runLoad(agmDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded, countType); if (success && cleanUp) { - runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "AGM disease annotation"); + runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, countType); } bulkLoadFileHistory.finishLoad(); updateHistory(bulkLoadFileHistory); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java index 324f1e17e..eb3c6bc5a 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/AlleleDiseaseAnnotationExecutor.java @@ -50,14 +50,11 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { bulkLoadFileHistory.getBulkLoadFile().setRecordCount(annotations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - - bulkLoadFileHistory.setCount(annotations.size()); - - updateHistory(bulkLoadFileHistory); - boolean success = runLoad(alleleDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded); + String countType = "Allele Disease Annotations"; + boolean success = runLoad(alleleDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded, countType); if (success && cleanUp) { - runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "allele disease annotation"); + runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, countType); } bulkLoadFileHistory.finishLoad(); updateHistory(bulkLoadFileHistory); diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java index 07a68d88e..2e0c8c5a1 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/GeneDiseaseAnnotationExecutor.java @@ -51,12 +51,10 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) { bulkLoadFileHistory.getBulkLoadFile().setRecordCount(annotations.size() + bulkLoadFileHistory.getBulkLoadFile().getRecordCount()); bulkLoadFileDAO.merge(bulkLoadFileHistory.getBulkLoadFile()); - bulkLoadFileHistory.setCount(annotations.size()); - updateHistory(bulkLoadFileHistory); - - boolean success = runLoad(geneDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded); + String countType = "Gene Disease Annotations"; + boolean success = runLoad(geneDiseaseAnnotationService, bulkLoadFileHistory, dataProvider, annotations, annotationIdsLoaded, countType); if (success && cleanUp) { - runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, "gene disease annotation"); + runCleanup(diseaseAnnotationService, bulkLoadFileHistory, dataProvider.name(), annotationIdsBefore, annotationIdsLoaded, countType); } bulkLoadFileHistory.finishLoad(); updateHistory(bulkLoadFileHistory); From 0cccde749412424d69f4b108262f585956fc3f4c Mon Sep 17 00:00:00 2001 From: Adam Gibson Date: Mon, 25 Nov 2024 10:57:49 -0600 Subject: [PATCH 109/118] SCRUM-4607 add sticky header to data loads page --- .../dataLoadsPage/DataLoadsComponent.js | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js index 4a6cfa014..9095911eb 100644 --- a/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js +++ b/src/main/cliapp/src/containers/dataLoadsPage/DataLoadsComponent.js @@ -22,6 +22,8 @@ import { SiteContext } from '../layout/SiteContext'; import { LoadingOverlay } from '../../components/LoadingOverlay'; import moment from 'moment-timezone'; import { NumberTemplate } from '../../components/Templates/NumberTemplate'; +import { StickyHeader } from '../../components/StickyHeader'; +import { Splitter, SplitterPanel } from 'primereact/splitter'; export const DataLoadsComponent = () => { const { authState } = useOktaAuth(); @@ -852,20 +854,26 @@ export const DataLoadsComponent = () => { <> + + + +