Skip to content

Commit

Permalink
analysis: improve family QC by downloading external files for related…
Browse files Browse the repository at this point in the history
…ness analysis, #TASK-6722, #TASK-6766
  • Loading branch information
jtarraga committed Sep 9, 2024
1 parent 139d631 commit ea2d888
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis;
import org.opencb.opencga.analysis.variant.relatedness.RelatednessAnalysis;
import org.opencb.opencga.catalog.db.api.IndividualDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
Expand All @@ -40,6 +41,7 @@
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams;
import org.opencb.opencga.core.models.variant.FamilyQcRelatednessAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
Expand Down Expand Up @@ -68,13 +70,54 @@ public class FamilyVariantQcAnalysis extends VariantQcAnalysis {
public static final String DESCRIPTION = "Run quality control (QC) for a given family. It computes the relatedness scores among the"
+ " family members";

public static final String RELATEDNESS_POP_FREQ_FILENAME = "autosomes_1000G_QC_prune_in.frq";
public static final String RELATEDNESS_POP_EXCLUDE_VAR_FILENAME = "autosomes_1000G_QC.prune.out";
public static final String RELATEDNESS_THRESHOLDS_FILENAME = "relatedness_thresholds.tsv";

private static final String RELATEDNESS_POP_FREQ_FILE_MSG = "Population frequency file";
private static final String RELATEDNESS_POP_EXCLUDE_VAR_FILE_MSG = "Population exclude variant file";
private static final String RELATEDNESS_THRESHOLDS_FILE_MSG = "Thresholds file";

@ToolParams
protected final FamilyQcAnalysisParams analysisParams = new FamilyQcAnalysisParams();

@Override
protected void check() throws Exception {
super.check();
checkParameters(analysisParams, getStudy(), catalogManager, token);

// Get paths from external files
FamilyQcRelatednessAnalysisParams relatednessParams = analysisParams.getRelatednessParams();

// Get relatedness population frequency
if (relatednessParams != null && StringUtils.isNotEmpty(relatednessParams.getPopulationFrequencyFile())) {
Path path = checkFileParameter(relatednessParams.getPopulationFrequencyFile(), RELATEDNESS_POP_FREQ_FILE_MSG, getStudy(),
catalogManager, getToken());
analysisParams.getRelatednessParams().setPopulationFrequencyFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(RelatednessAnalysis.ID, RELATEDNESS_POP_FREQ_FILENAME);
analysisParams.getRelatednessParams().setPopulationFrequencyFile(path.toAbsolutePath().toString());
}

// Get relatedness population exclude variant
if (relatednessParams != null && StringUtils.isNotEmpty(relatednessParams.getPopulationExcludeVariantsFile())) {
Path path = checkFileParameter(relatednessParams.getPopulationExcludeVariantsFile(), RELATEDNESS_POP_EXCLUDE_VAR_FILE_MSG,
getStudy(), catalogManager, getToken());
analysisParams.getRelatednessParams().setPopulationExcludeVariantsFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(RelatednessAnalysis.ID, RELATEDNESS_POP_EXCLUDE_VAR_FILENAME);
analysisParams.getRelatednessParams().setPopulationExcludeVariantsFile(path.toAbsolutePath().toString());
}

// Get relatedness thresholds
if (relatednessParams != null && StringUtils.isNotEmpty(relatednessParams.getPopulationFrequencyFile())) {
Path path = checkFileParameter(relatednessParams.getThresholdsFile(), RELATEDNESS_THRESHOLDS_FILE_MSG, getStudy(),
catalogManager, getToken());
analysisParams.getRelatednessParams().setThresholdsFile(path.toAbsolutePath().toString());
} else {
Path path = getExternalFilePath(RelatednessAnalysis.ID, RELATEDNESS_THRESHOLDS_FILENAME);
analysisParams.getRelatednessParams().setThresholdsFile(path.toAbsolutePath().toString());
}
}

@Override
Expand Down Expand Up @@ -229,6 +272,22 @@ public static void checkParameters(FamilyQcAnalysisParams params, String studyId
throw new ToolException("Found the following error for family IDs: " + StringUtils.join(errors.entrySet().stream().map(
e -> "Family ID " + e.getKey() + ": " + e.getValue()).collect(Collectors.toList()), ","));
}

// Check external files: pop. freq. file, pop. exclude var. file and threadshold file
if (params.getRelatednessParams() != null) {
FamilyQcRelatednessAnalysisParams relatednessParams = params.getRelatednessParams();
if (StringUtils.isNotEmpty(relatednessParams.getPopulationFrequencyFile())) {
checkFileParameter(relatednessParams.getPopulationFrequencyFile(), RELATEDNESS_POP_FREQ_FILE_MSG, studyId, catalogManager,
token);
}
if (StringUtils.isNotEmpty(relatednessParams.getPopulationExcludeVariantsFile())) {
checkFileParameter(relatednessParams.getPopulationExcludeVariantsFile(), RELATEDNESS_POP_EXCLUDE_VAR_FILE_MSG, studyId,
catalogManager, token);
}
if (StringUtils.isNotEmpty(relatednessParams.getThresholdsFile())) {
checkFileParameter(relatednessParams.getThresholdsFile(), RELATEDNESS_THRESHOLDS_FILE_MSG, studyId, catalogManager, token);
}
}
}

private static List<String> getSampleIds(Family family, String studyId, CatalogManager catalogManager, String token)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public void run() throws ToolExecutorException {
// Execute Pythong script in docker
String dockerImage = "opencb/opencga-ext-tools:" + GitRepositoryState.getInstance().getBuildVersion();

//DockerUtils.run(dockerImage, inputBindings, outputBinding, params, null);
DockerUtils.run(dockerImage, inputBindings, outputBinding, params, null);
} catch (IOException e) {
throw new ToolExecutorException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,25 @@

import org.apache.commons.lang3.StringUtils;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.ResourceUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.utils.CatalogFqn;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.JwtPayload;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.study.StudyPermissions;
import org.opencb.opencga.core.tools.ToolParams;

import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

import static org.opencb.opencga.core.models.study.StudyPermissions.Permissions.WRITE_SAMPLES;

public class VariantQcAnalysis extends OpenCgaToolScopeStudy {
Expand Down Expand Up @@ -78,4 +87,65 @@ protected static void checkPermissions(StudyPermissions.Permissions permissions,
throw new ToolException(e);
}
}

protected static Path checkFileParameter(String fileId, String msg, String studyId, CatalogManager catalogManager, String token)
throws ToolException {
if (StringUtils.isEmpty(fileId)) {
throw new ToolException(msg + " ID is empty");
}
File file;
try {
file = catalogManager.getFileManager().get(studyId, fileId, QueryOptions.empty(), token).first();
} catch (CatalogException e) {
throw new ToolExecutorException(msg + " ID '" + fileId + "' not found in OpenCGA catalog", e);
}
Path path = Paths.get(file.getUri());
if (!Files.exists(path)) {
throw new ToolExecutorException(msg + " '" + path + "' does not exist (file ID: " + fileId + ")");
}
return path;
}

protected Path getExternalFilePath(String analysisId, String resourceName) throws ToolException {
URL url = null;
try {
url = new URL(ResourceUtils.URL + "analysis/" + analysisId + "/" + resourceName);
ResourceUtils.downloadThirdParty(url, getOutDir());
} catch (IOException e) {
throw new ToolException("Something wrong happened downloading the resource '" + resourceName + "' from '" + url + "'", e);
}

if (!Files.exists(getOutDir().resolve(resourceName))) {
throw new ToolException("After downloading the resource '" + resourceName + "', it does not exist at " + getOutDir());
}
return getOutDir().resolve(resourceName);
}

protected Path downloadExternalFileAtResources(String analysisId, String resourceName) throws ToolException {
// Check if the resource has been downloaded previously
Path resourcePath = getOpencgaHome().resolve("analysis/resources/" + analysisId);
if (!Files.exists(resourcePath)) {
// Create the resource path if it does not exist yet
try {
Files.createDirectories(resourcePath);
} catch (IOException e) {
throw new ToolException("It could not create the resource path '" + resourcePath + "'", e);
}
}
if (!Files.exists(resourcePath.resolve(resourceName))) {
// Otherwise, download it from the resource repository
URL url = null;
try {
url = new URL(ResourceUtils.URL + "analysis/" + analysisId + "/" + resourceName);
ResourceUtils.downloadThirdParty(url, resourcePath);
} catch (IOException e) {
throw new ToolException("Something wrong happened downloading the resource '" + resourceName + "' from '" + url + "'", e);
}

if (!Files.exists(resourcePath.resolve(resourceName))) {
throw new ToolException("After downloading the resource '" + resourceName + "', it does not exist at " + resourcePath);
}
}
return resourcePath.resolve(resourceName);
}
}
23 changes: 15 additions & 8 deletions opencga-app/app/analysis/qc/family_qc/family_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import logging
import gzip
import json
import subprocess
#import pandas

from utils import create_output_dir, execute_bash_command

Expand Down Expand Up @@ -184,12 +182,12 @@ def relatedness_results_data_model(method):
# Return relatedness json data model with method info filled in.
return relatedness_json

def relatedness_plink(self,filtered_vcf_fpath,pop_freq_fpath,pop_exclude_var_fpath,outdir_fpath,method="PLINK/IBD"):
def relatedness_plink(self,filtered_vcf_fpath,pop_freq_fpath,pop_exclude_var_fpath,outdir_fpath,plink_path,method="PLINK/IBD"):
LOGGER.info('Method: {}'.format(method))
plink_outdir_fpath = create_output_dir(path_elements=[str(outdir_fpath),'plink_IBD'])
sex_info_fpath, parent_offspring_fpath = self.generate_files_for_plink_fam_file(outdir_fpath=plink_outdir_fpath)

plink_path = "path/to/plink"
# Preparing PLINK commands
plink_path = str(plink_path)
files_prefix = self.id_ + "_plink_relatedness_results"
plink_output_folder_files_prefix = os.path.join(plink_outdir_fpath,files_prefix)
cmd_plink_files = ' '.join([plink_path,
Expand Down Expand Up @@ -248,8 +246,18 @@ def relatedness_validation(reported_result,inferred_result):
def relatedness_inference(self,relatedness_thresholds_fpath,method,plink_genome_fpath):
# Reading relatedness thresholds file (.tsv)
LOGGER.debug('Getting relatedness thresholds from file: "{}"'.format(relatedness_thresholds_fpath))
relatedness_thresholds_fhand = pandas.read_csv(str(relatedness_thresholds_fpath),header=0,sep='\t').set_index('relationship')
relationship_groups_thresholds_dict = relatedness_thresholds_fhand.to_dict("index")
relatedness_thresholds_fhand = open(str(relatedness_thresholds_fpath))
relationship_groups_thresholds_dict = {}
for index,line in enumerate(relatedness_thresholds_fhand):
relatedness_thresholds_row_values = line.strip().split()
if index == 0:
relatedness_thresholds_file_header = relatedness_thresholds_row_values
continue
for column,value in enumerate(relatedness_thresholds_row_values):
if relatedness_thresholds_file_header[column] == 'relationship':
relationship_groups_thresholds_dict[value] == {}
else:
relationship_groups_thresholds_dict[relatedness_thresholds_file_header[column]] == value

# Reading plink genome file (.genome)
LOGGER.debug('Getting PLINK results from file: "{}"'.format(plink_genome_fpath))
Expand All @@ -260,7 +268,6 @@ def relatedness_inference(self,relatedness_thresholds_fpath,method,plink_genome_
for index,line in enumerate(input_genome_file_fhand):
genome_file_row_values = line.strip().split()
if index == 0:
genome_file_header = genome_file_row_values
continue
# Getting values from PLINK .genome file block
score = relatedness_results["scores"][0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.opencb.opencga.core.models.variant.CircosAnalysisParams;
import org.opencb.opencga.core.models.variant.CohortVariantStatsAnalysisParams;
import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams;
import org.opencb.opencga.core.models.variant.FamilyQcRelatednessAnalysisParams;
import org.opencb.opencga.core.models.variant.GatkWrapperParams;
import org.opencb.opencga.core.models.variant.GenomePlotAnalysisParams;
import org.opencb.opencga.core.models.variant.GwasAnalysisParams;
Expand Down Expand Up @@ -623,6 +624,9 @@ private RestResponse<Job> runFamilyQc() throws Exception {
putNestedIfNotEmpty(beanParams, "family",commandOptions.family, true);
putNestedIfNotEmpty(beanParams, "relatednessMethod",commandOptions.relatednessMethod, true);
putNestedIfNotEmpty(beanParams, "relatednessMaf",commandOptions.relatednessMaf, true);
putNestedIfNotEmpty(beanParams, "relatednessParams.populationFrequencyFile",commandOptions.relatednessParamsPopulationFrequencyFile, true);
putNestedIfNotEmpty(beanParams, "relatednessParams.populationExcludeVariantsFile",commandOptions.relatednessParamsPopulationExcludeVariantsFile, true);
putNestedIfNotEmpty(beanParams, "relatednessParams.thresholdsFile",commandOptions.relatednessParamsThresholdsFile, true);
putNestedIfNotNull(beanParams, "skipIndex",commandOptions.skipIndex, true);
putNestedIfNotNull(beanParams, "overwrite",commandOptions.overwrite, true);
putNestedIfNotEmpty(beanParams, "outdir",commandOptions.outdir, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,15 @@ public class RunFamilyQcCommandOptions {
@Parameter(names = {"--relatedness-maf"}, description = "Minor allele frequence (MAF) is used to filter variants before computing relatedness, e.g.: 1000G:CEU>0.35 or cohort:ALL>0.05", required = false, arity = 1)
public String relatednessMaf;

@Parameter(names = {"--relatedness-params-population-frequency-file"}, description = "Population frequencies file ID for relatedness analysis", required = false, arity = 1)
public String relatednessParamsPopulationFrequencyFile;

@Parameter(names = {"--relatedness-params-population-exclude-variants-file"}, description = "Population exclude variants file ID for relatedness analysis", required = false, arity = 1)
public String relatednessParamsPopulationExcludeVariantsFile;

@Parameter(names = {"--relatedness-params-thresholds-file"}, description = "Threshold file ID for relatedness analysis", required = false, arity = 1)
public String relatednessParamsThresholdsFile;

@Parameter(names = {"--skip-index"}, description = "Do not save the computed quality control in catalog", required = false, arity = 1)
public Boolean skipIndex;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ public class UpdateCommandOptions {
@Parameter(names = {"--expected-size"}, description = "The body web service expectedSize parameter", required = false, arity = 1)
public Integer expectedSize;

@Parameter(names = {"--quality-control-files"}, description = "File IDs related to the quality control.", required = false, arity = 1)
@Parameter(names = {"--quality-control-files"}, description = "File IDs related to the quality control", required = false, arity = 1)
public String qualityControlFiles;

@Parameter(names = {"--quality-control-status-id"}, description = "The body web service id parameter", required = false, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ public class UpdateCommandOptions {
@Parameter(names = {"--status-description"}, description = "The body web service description parameter", required = false, arity = 1)
public String statusDescription;

@Parameter(names = {"--quality-control-files"}, description = "File IDs related to the quality control.", required = false, arity = 1)
@Parameter(names = {"--quality-control-files"}, description = "File IDs related to the quality control", required = false, arity = 1)
public String qualityControlFiles;

@DynamicParameter(names = {"--attributes"}, description = "The body web service attributes parameter. Use: --attributes key=value", required = false)
Expand Down
Loading

0 comments on commit ea2d888

Please sign in to comment.