Skip to content

Commit

Permalink
analysis: add the parameter resourcesDir for QC analysis, and update …
Browse files Browse the repository at this point in the history
…the code according to this change, #TASK-6772, #TASK-6766
  • Loading branch information
jtarraga committed Sep 11, 2024
1 parent 8766ea1 commit a479682
Show file tree
Hide file tree
Showing 10 changed files with 146 additions and 349 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ protected void check() throws Exception {
super.check();
checkParameters(analysisParams, getStudy(), catalogManager, token);

// Update paths from relatedness external files
updateRelatednessFilePaths(analysisParams.getRelatednessParams());
// Prepare relatedness resource files
prepareRelatednessResources(analysisParams.getResourcesDir());
}

@Override
Expand Down Expand Up @@ -217,8 +217,8 @@ public static void checkParameters(FamilyQcAnalysisParams params, String studyId
e -> "Family ID " + e.getKey() + ": " + e.getValue()).collect(Collectors.toList()), ","));
}

// Check relatedness files: pop. freq. file, pop. exclude var. file and threshold file
checkRelatednessParameters(params.getRelatednessParams(), studyId, catalogManager, token);
// Check resources dir
checkResourcesDir(params.getResourcesDir(), studyId, catalogManager, token);
}

private void updateFamilyQualityControl(List<Family> families) throws ToolException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ protected void check() throws Exception {
super.check();
checkParameters(analysisParams, getStudy(), catalogManager, token);

// Check for the presence of trios to compute relatedness
// Check for the presence of trios to compute relatedness, and then prepare relatedness resource files
for (String individualId : analysisParams.getIndividuals()) {
// Get individual
Individual individual = catalogManager.getIndividualManager().get(study, individualId, QueryOptions.empty(), token).first();
Expand All @@ -87,10 +87,11 @@ protected void check() throws Exception {
}
}
if (CollectionUtils.isNotEmpty(trios)) {
updateRelatednessFilePaths(analysisParams.getRelatednessParams());
prepareRelatednessResources(analysisParams.getResourcesDir());
}

updateInferredSexFilePaths(analysisParams.getInferredSexParams());
// Prepare inferred sex resource files
prepareInferredSexResources(analysisParams.getResourcesDir());
}

@Override
Expand Down Expand Up @@ -321,7 +322,7 @@ public static void checkParameters(IndividualQcAnalysisParams params, String stu
e -> "Individual ID " + e.getKey() + ": " + e.getValue()).collect(Collectors.toList()), ","));
}

// Check relatedness files: pop. freq. file, pop. exclude var. file and threshold file
checkRelatednessParameters(params.getRelatednessParams(), studyId, catalogManager, token);
// Check resources dir
checkResourcesDir(params.getResourcesDir(), studyId, catalogManager, token);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.ResourceUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.analysis.variant.relatedness.RelatednessAnalysis;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.db.api.IndividualDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.utils.CatalogFqn;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.JwtPayload;
import org.opencb.opencga.core.models.common.QualityControlStatus;
import org.opencb.opencga.core.models.family.Family;
Expand All @@ -41,8 +40,6 @@
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.study.StudyPermissions;
import org.opencb.opencga.core.models.variant.QcInferredSexAnalysisParams;
import org.opencb.opencga.core.models.variant.QcRelatednessAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;

import java.io.IOException;
Expand Down Expand Up @@ -148,91 +145,50 @@ protected static void checkPermissions(StudyPermissions.Permissions permissions,
}
}

protected static void checkRelatednessParameters(QcRelatednessAnalysisParams relatednessParams, String studyId,
CatalogManager catalogManager, String token) throws ToolException {
if (StringUtils.isNotEmpty(relatednessParams.getPopulationFrequencyFile())) {
checkFileParameter(relatednessParams.getPopulationFrequencyFile(), RELATEDNESS_POP_FREQ_FILE_MSG, studyId, catalogManager,
token);
}
if (StringUtils.isNotEmpty(relatednessParams.getPopulationExcludeVariantsFile())) {
checkFileParameter(relatednessParams.getPopulationExcludeVariantsFile(), RELATEDNESS_POP_EXCLUDE_VAR_FILE_MSG, studyId,
catalogManager, token);
}
if (StringUtils.isNotEmpty(relatednessParams.getThresholdsFile())) {
checkFileParameter(relatednessParams.getThresholdsFile(), RELATEDNESS_THRESHOLDS_FILE_MSG, studyId, catalogManager, token);
protected static Path checkResourcesDir(String resourcesDir, String studyId, CatalogManager catalogManager, String token)
throws ToolException {
Path path = null;
if (StringUtils.isNotEmpty(resourcesDir)) {
try {
Query query = new Query(FileDBAdaptor.QueryParams.PATH.key(), resourcesDir);
OpenCGAResult<File> fileResult = catalogManager.getFileManager().search(studyId, query, QueryOptions.empty(), token);
if (fileResult.getNumResults() == 0) {
throw new ToolException("Could not find the resources path '" + resourcesDir + "' in OpenCGA catalog");
}
if (fileResult.getNumResults() > 1) {
throw new ToolException("Multiple results found (" + fileResult.getNumResults() + ") for resources path '"
+ resourcesDir + "' in OpenCGA catalog");
}
path = Paths.get(fileResult.first().getUri());
if (!Files.exists(path)) {
throw new ToolException("Resources path '" + path + "' does not exist (OpenCGA path: " + resourcesDir + ")");
}
return path;
} catch (CatalogException e) {
throw new ToolException("Error searching the OpenCGA catalog path '" + resourcesDir + "'", e);
}
}
return path;
}

protected void updateRelatednessFilePaths(QcRelatednessAnalysisParams relatednessParams) throws ToolException {
// Sanity check
if (relatednessParams == null) {
throw new ToolException("Internal error input parameter is null");
}
protected void prepareRelatednessResources(String resourcesDir) throws ToolException {
Path path = checkResourcesDir(resourcesDir, getStudy(), getCatalogManager(), getToken());

// Get relatedness population frequency
if (StringUtils.isNotEmpty(relatednessParams.getPopulationFrequencyFile())) {
Path path = checkFileParameter(relatednessParams.getPopulationFrequencyFile(), RELATEDNESS_POP_FREQ_FILE_MSG, getStudy(),
catalogManager, getToken());
relatednessParams.setPopulationFrequencyFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(RelatednessAnalysis.ID, RELATEDNESS_POP_FREQ_FILENAME);
relatednessParams.setPopulationFrequencyFile(path.toAbsolutePath().toString());
}
// Copy relatedness population frequency file
copyQcResourceFile(path, RELATEDNESS_POP_FREQ_FILENAME);

// Get relatedness population exclude variant
if (StringUtils.isNotEmpty(relatednessParams.getPopulationExcludeVariantsFile())) {
Path path = checkFileParameter(relatednessParams.getPopulationExcludeVariantsFile(), RELATEDNESS_POP_EXCLUDE_VAR_FILE_MSG,
getStudy(), catalogManager, getToken());
relatednessParams.setPopulationExcludeVariantsFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(RelatednessAnalysis.ID, RELATEDNESS_POP_EXCLUDE_VAR_FILENAME);
relatednessParams.setPopulationExcludeVariantsFile(path.toAbsolutePath().toString());
}
// Copy relatedness population exclude variant file
copyQcResourceFile(path, RELATEDNESS_POP_FREQ_FILENAME);

// Get relatedness thresholds
if (StringUtils.isNotEmpty(relatednessParams.getThresholdsFile())) {
Path path = checkFileParameter(relatednessParams.getThresholdsFile(), RELATEDNESS_THRESHOLDS_FILE_MSG, getStudy(),
catalogManager, getToken());
relatednessParams.setThresholdsFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(RELATEDNESS_ANALYSIS_ID, RELATEDNESS_THRESHOLDS_FILENAME);
relatednessParams.setThresholdsFile(path.toAbsolutePath().toString());
}
// Copy relatedness thresholds file
copyQcResourceFile(path, RELATEDNESS_THRESHOLDS_FILENAME);
}

protected void updateInferredSexFilePaths(QcInferredSexAnalysisParams inferredSexParams) throws ToolException {
// Sanity check
if (inferredSexParams == null) {
throw new ToolException("Internal error input parameter is null");
}

// Get inferred sex thresholds
if (StringUtils.isNotEmpty(inferredSexParams.getThresholdsFile())) {
Path path = checkFileParameter(inferredSexParams.getThresholdsFile(), INFERRED_SEX_THRESHOLDS_FILE_MSG, getStudy(),
catalogManager, getToken());
inferredSexParams.setThresholdsFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(INFERRED_SEX_ANALYSIS_ID, INFERRED_SEX_THRESHOLDS_FILENAME);
inferredSexParams.setThresholdsFile(path.toAbsolutePath().toString());
}
}
protected void prepareInferredSexResources(String resourcesDir) throws ToolException {
Path path = checkResourcesDir(resourcesDir, getStudy(), getCatalogManager(), getToken());

protected static Path checkFileParameter(String fileId, String msg, String studyId, CatalogManager catalogManager, String token)
throws ToolException {
if (StringUtils.isEmpty(fileId)) {
throw new ToolException(msg + " ID is empty");
}
File file;
try {
file = catalogManager.getFileManager().get(studyId, fileId, QueryOptions.empty(), token).first();
} catch (CatalogException e) {
throw new ToolExecutorException(msg + " ID '" + fileId + "' not found in OpenCGA catalog", e);
}
Path path = Paths.get(file.getUri());
if (!Files.exists(path)) {
throw new ToolExecutorException(msg + " '" + path + "' does not exist (file ID: " + fileId + ")");
}
return path;
// Copy inferred sex thresholds file
copyQcResourceFile(path, INFERRED_SEX_THRESHOLDS_FILENAME);
}

protected boolean setQualityControlStatus(QualityControlStatus qcStatus, String id, String qcType) throws ToolException {
Expand Down Expand Up @@ -311,83 +267,75 @@ protected static List<String> getNoSomaticSampleIds(Individual individual) {
return sampleIds;
}

protected Path getExternalFilePath(String analysisId, String resourceName) throws ToolException {
Path resourcesPath = getOutDir().resolve(RESOURCES_FOLDER);
if (!Files.exists(resourcesPath)) {
protected void copyQcResourceFile(Path path, String resourceName) throws ToolException {
// Copy resource file
if (path == null || !Files.exists(path.resolve(resourceName))) {
// Use the default resource file
copyQcResourceFile(resourceName);
} else {
// Use the custom resource file
copyQcResourceFile(path.resolve(resourceName));
}
}

protected void copyQcResourceFile(String resourceName) throws ToolException {
Path srcResourcesPath = getOpencgaHome().resolve(ANALYSIS_RESOURCES_FOLDER).resolve(QC_FOLDER);
Path destResourcesPath = checkResourcesPath(getOutDir().resolve(RESOURCES_FOLDER));
if (Files.exists(srcResourcesPath.resolve(resourceName))) {
// Copy resource file
copyQcResourceFile(srcResourcesPath.resolve(resourceName));
} else {
// Download directly into the job dir
// It can be improved by downloading once (the first time) in the analysis resources folder
URL url = null;
try {
Files.createDirectories(resourcesPath);
if (!Files.exists(resourcesPath)) {
throw new ToolException("Something wrong happened when creating the resources folder at " + resourcesPath);
}
url = new URL(ResourceUtils.URL + ANALYSIS_FOLDER + QC_FOLDER + "/" + resourceName);
ResourceUtils.downloadThirdParty(url, destResourcesPath);
} catch (IOException e) {
throw new ToolException("Error creating the resources folder at " + resourcesPath, e);
throw new ToolException("Something wrong happened when downloading the resource '" + resourceName + "' from '"
+ url + "'", e);
}

if (!Files.exists(destResourcesPath.resolve(resourceName))) {
throw new ToolException("Error downloading the resource '" + resourceName + "', it does not exist at " + destResourcesPath);
}
}
switch (resourceName) {
case RELATEDNESS_THRESHOLDS_FILENAME:
case INFERRED_SEX_THRESHOLDS_FILENAME:
return copyExternalFile(getOpencgaHome().resolve(ANALYSIS_FOLDER).resolve(QC_RESOURCES_FOLDER).resolve(resourceName));
default:
return downloadExternalFile(analysisId, resourceName);
}
}

protected Path copyExternalFile(Path source) throws ToolException {
Path dest = getOutDir().resolve(RESOURCES_FOLDER).resolve(source.getFileName());
protected void copyQcResourceFile(Path srcResourcesPath) throws ToolException {
String resourceName = srcResourcesPath.getFileName().toString();
Path destResourcesPath = checkResourcesPath(getOutDir().resolve(RESOURCES_FOLDER));

String msg = "Error copying resource file '" + resourceName + "'";

// Copy resource file
try {
Files.copy(source, dest);
Files.copy(srcResourcesPath, destResourcesPath.resolve(resourceName));
} catch (IOException e) {
String msg = "Error copying resource file '" + source.getFileName() + "'";
if (!Files.exists(dest) || source.toFile().length() != dest.toFile().length()) {
if (!Files.exists(destResourcesPath.resolve(resourceName))
|| srcResourcesPath.toFile().length() != destResourcesPath.resolve(resourceName).toFile().length()) {
throw new ToolException(msg, e);
}
logger.warn(msg, e);
}
return dest;
}

protected Path downloadExternalFile(String analysisId, String resourceName) throws ToolException {
URL url = null;
Path resourcesPath = getOutDir().resolve(RESOURCES_FOLDER);
try {
url = new URL(ResourceUtils.URL + ANALYSIS_FOLDER + analysisId + "/" + resourceName);
ResourceUtils.downloadThirdParty(url, resourcesPath);
} catch (IOException e) {
throw new ToolException("Something wrong happened when downloading the resource '" + resourceName + "' from '" + url + "'", e);
if (!Files.exists(destResourcesPath.resolve(resourceName))) {
throw new ToolException(msg + ", it does not exist at " + destResourcesPath);
}

if (!Files.exists(resourcesPath.resolve(resourceName))) {
throw new ToolException("After downloading the resource '" + resourceName + "', it does not exist at " + resourcesPath);
}
return resourcesPath.resolve(resourceName);
}

protected Path downloadExternalFileAtResources(String analysisId, String resourceName) throws ToolException {
// Check if the resource has been downloaded previously
Path resourcePath = getOpencgaHome().resolve(ANALYSIS_RESOURCES_FOLDER + analysisId);
if (!Files.exists(resourcePath)) {
// Create the resource path if it does not exist yet
try {
Files.createDirectories(resourcePath);
} catch (IOException e) {
throw new ToolException("It could not create the resource path '" + resourcePath + "'", e);
}
}
if (!Files.exists(resourcePath.resolve(resourceName))) {
// Otherwise, download it from the resource repository
URL url = null;
protected Path checkResourcesPath(Path resourcesPath) throws ToolException {
if (!Files.exists(resourcesPath)) {
try {
url = new URL(ResourceUtils.URL + ANALYSIS_FOLDER + analysisId + "/" + resourceName);
ResourceUtils.downloadThirdParty(url, resourcePath);
Files.createDirectories(resourcesPath);
if (!Files.exists(resourcesPath)) {
throw new ToolException("Something wrong happened when creating the resources folder at " + resourcesPath);
}
} catch (IOException e) {
throw new ToolException("Something wrong happened downloading the resource '" + resourceName + "' from '" + url + "'", e);
}

if (!Files.exists(resourcePath.resolve(resourceName))) {
throw new ToolException("After downloading the resource '" + resourceName + "', it does not exist at " + resourcePath);
throw new ToolException("Error creating the resources folder at " + resourcesPath, e);
}
}
return resourcePath.resolve(resourceName);
return resourcesPath;
}

protected boolean performQualityControl(QualityControlStatus qcStatus, Boolean overwrite) {
Expand Down
Loading

0 comments on commit a479682

Please sign in to comment.