Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-4158 - Add functionality for indexing, querying and aggregating to the CVDB Engine #2379

Merged
merged 34 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
d6f754a
storage: minor improvement in variant search converter, #TASK-4158
jtarraga Apr 5, 2023
40e5787
Remove unused files, #TASK-4158
jtarraga Apr 18, 2023
5e87324
app: implement CLI to load clinical analyses from a file, #TASK-4610,…
jtarraga Jun 20, 2023
e91e58f
client: upload file, #TASK-4610, #TASK-4158
jtarraga Jun 21, 2023
d81287e
Merge develop, #TASK-4158
jtarraga Jun 21, 2023
a48ae49
Merge branch 'develop' into TASK-4158
jtarraga Aug 30, 2023
43f933c
Resolve conflicts, #TASK-4158
jtarraga Sep 14, 2023
fc73d9b
analysis: add clinical analysis manager test for loading clinical ana…
jtarraga Sep 15, 2023
f9c2e30
analysis: add clinical analysis load task test, #TASK-4610, #TASK-TAS…
jtarraga Sep 15, 2023
5ac1346
analysis: add a data model for the clinical analysis load result, #TA…
jtarraga Sep 18, 2023
6aae84a
Merge branch 'develop' into TASK-4158
jtarraga Sep 19, 2023
d926948
Merge branch 'develop' into TASK-4158
jtarraga Sep 21, 2023
16bbfda
Rename CVA to CVDB and generate clients, #TASK-4158
jtarraga Sep 21, 2023
a16d6d8
catalog: fix clinical analysis loading, #TASK-4610, #TASK-4158
jtarraga Oct 16, 2023
f966697
Merge branch 'develop' into TASK-4158
jtarraga Oct 19, 2023
7d1793f
Resolve conflicts, #TASK-4158
jtarraga Nov 2, 2023
abdc630
client: generate clients, #TASK-4158
jtarraga Nov 6, 2023
2daade7
analysis: update tool factory, runner and execution daemon to execute…
jtarraga Nov 9, 2023
266b371
core: add ignore annotation for packages in configuration, #TASK-4158
jtarraga Nov 13, 2023
4c25393
core: fix configuration, #TASK-4158
jtarraga Nov 14, 2023
adfa52c
core: remove unused imports, #TASK-4158
jtarraga Nov 14, 2023
a2de5e2
catalog: generate method to get effective permissions, #TASK-4158
pfurio Nov 15, 2023
2394c31
Merge branch 'TASK-4158' of github.com:opencb/opencga into TASK-4158
pfurio Nov 15, 2023
6af436b
catalog: test effective permission implementation, #TASK-4158
pfurio Nov 15, 2023
2045340
catalog: test with simplifyPermissions configuration, #TASK-4158
pfurio Nov 16, 2023
8c54a86
catalog: improve clinical analysis loading, #TASK-4610, #TASK-TASK-4158
jtarraga Nov 16, 2023
4a54bc0
Merge branch 'TASK-4158' of https://github.com/opencb/opencga into TA…
jtarraga Nov 16, 2023
7757fab
client: generate clients after merging develop, #TASK-4158
jtarraga Nov 16, 2023
41f3ca0
client: generate clients, #TASK-4158
jtarraga Nov 17, 2023
5c27325
Merge branch 'develop' into TASK-4158
jtarraga Nov 27, 2023
615aba1
Merge branch 'develop' into TASK-4158
jtarraga Dec 7, 2023
b24f92d
Resolve conflicts, #TASK-4158
jtarraga Jan 19, 2024
3c256ba
Merge branch 'develop' into TASK-4158
imedina Feb 1, 2024
946755f
Merge branch 'develop' into TASK-4158
jtarraga Feb 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.opencb.opencga.analysis.clinical;

import org.apache.commons.lang3.StringUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.catalog.managers.FileManager;
import org.opencb.opencga.catalog.models.ClinicalAnalysisLoadResult;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.clinical.ClinicalAnalysisLoadParams;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;

@Tool(id = ClinicalAnalysisLoadTask.ID, resource = Enums.Resource.CLINICAL_ANALYSIS, description = ClinicalAnalysisLoadTask.DESCRIPTION)
public class ClinicalAnalysisLoadTask extends OpenCgaToolScopeStudy {
public final static String ID = "load";
public static final String DESCRIPTION = "Load clinical analyses from a file";

private Path filePath;

@ToolParams
protected ClinicalAnalysisLoadParams params = new ClinicalAnalysisLoadParams();

@Override
protected void check() throws Exception {
super.check();

String fileStr = params.getFile();
if (StringUtils.isEmpty(fileStr)) {
throw new ToolException("Missing input file when loading clinical analyses.");
}

File file = catalogManager.getFileManager().get(getStudy(), fileStr, FileManager.INCLUDE_FILE_URI_PATH, token).first();
filePath = Paths.get(file.getUri());
if (!filePath.toFile().exists()) {
throw new ToolException("Input file '" + fileStr + "' does not exist: " + filePath);
}
}

@Override
protected void run() throws Exception {
step(() -> {
ClinicalAnalysisLoadResult loadResult = catalogManager.getClinicalAnalysisManager().load(getStudy(), filePath, token);

// Add results as attributes
addAttribute("Num. clinical analyses loaded", loadResult.getNumLoaded());
addAttribute("Num. clinical analyses not loaded", loadResult.getFailures().size());
addAttribute("Loading time (in sec.)", loadResult.getTime());
addAttribute("Clinical analyses file name", loadResult.getFilename());

// Add warnings with the not loaded clinical analysis
if (loadResult.getFailures().size() > 0) {
for (Map.Entry<String, String> entry : loadResult.getFailures().entrySet()) {
addWarning("Clinical analysis " + entry.getKey() + " could not be loaded due to error: " + entry.getValue());
}
}
});
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@ public class ToolFactory {
private static Map<String, Set<Class<? extends OpenCgaTool>>> duplicatedTools;
private static List<Class<? extends OpenCgaTool>> toolsList;

private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools() {
public static final String DEFAULT_PACKAGE = "org.opencb.opencga";

private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools(List<String> packages) {
if (toolsCache == null) {
Reflections reflections = new Reflections(new ConfigurationBuilder()
.setScanners(
new SubTypesScanner(),
new TypeAnnotationsScanner().filterResultsBy(s -> StringUtils.equals(s, Tool.class.getName()))
)
.addUrls(getUrls())
.addUrls(getUrlsFromPackages(packages))
.filterInputsBy(input -> input != null && input.endsWith(".class"))
);

Expand Down Expand Up @@ -85,9 +87,20 @@ private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools(
}
return toolsCache;
}
static Collection<URL> getUrlsFromPackages(List<String> packages) {
Collection<URL> urls = new LinkedList<>();
for (String pack :packages){
for (URL url : ClasspathHelper.forPackage(pack)) {
String name = url.getPath().substring(url.getPath().lastIndexOf('/') + 1);
if (name.isEmpty() || (name.contains("opencga") && !name.contains("opencga-storage-hadoop-deps"))) {
urls.add(url);
}
}
}
return urls;
}

static Collection<URL> getUrls() {
// TODO: What if there are third party libraries that implement Tools?
// Currently they must contain "opencga" in the jar name.
// e.g. acme-rockets-opencga-5.4.0.jar
Collection<URL> urls = new LinkedList<>();
Expand All @@ -101,6 +114,10 @@ static Collection<URL> getUrls() {
}

public final Class<? extends OpenCgaTool> getToolClass(String toolId) throws ToolException {
return getToolClass(toolId, Collections.singletonList(DEFAULT_PACKAGE));
}

public final Class<? extends OpenCgaTool> getToolClass(String toolId, List<String> packages) throws ToolException {
Objects.requireNonNull(toolId);

Class<? extends OpenCgaTool> aClass = null;
Expand All @@ -112,7 +129,7 @@ public final Class<? extends OpenCgaTool> getToolClass(String toolId) throws Too
} catch (ClassNotFoundException ignore) {
}
if (aClass == null) {
aClass = loadTools().get(toolId);
aClass = loadTools(packages).get(toolId);
}
if (aClass == null) {
throw new ToolException("Tool '" + toolId + "' not found");
Expand All @@ -121,11 +138,19 @@ public final Class<? extends OpenCgaTool> getToolClass(String toolId) throws Too
}

public Tool getTool(String toolId) throws ToolException {
return getToolClass(toolId).getAnnotation(Tool.class);
return getTool(toolId, Collections.singletonList(DEFAULT_PACKAGE));
}

public Tool getTool(String toolId, List<String> packages) throws ToolException {
return getToolClass(toolId, packages).getAnnotation(Tool.class);
}

public final OpenCgaTool createTool(String toolId) throws ToolException {
return createTool(getToolClass(toolId));
return createTool(toolId, Collections.singletonList(DEFAULT_PACKAGE));
}

public final OpenCgaTool createTool(String toolId, List<String> packages) throws ToolException {
return createTool(getToolClass(toolId, packages));
}

public final OpenCgaTool createTool(Class<? extends OpenCgaTool> aClass) throws ToolException {
Expand All @@ -141,12 +166,22 @@ public final OpenCgaTool createTool(Class<? extends OpenCgaTool> aClass) throws
}

public Collection<Class<? extends OpenCgaTool>> getTools() {
loadTools();
loadTools(Collections.singletonList(DEFAULT_PACKAGE));
return toolsList;
}

public Collection<Class<? extends OpenCgaTool>> getTools(List<String> packages) {
loadTools(packages);
return toolsList;
}

public Map<String, Set<Class<? extends OpenCgaTool>>> getDuplicatedTools() {
loadTools();
loadTools(Collections.singletonList(DEFAULT_PACKAGE));
return duplicatedTools;
}

public Map<String, Set<Class<? extends OpenCgaTool>>> getDuplicatedTools(List<String> packages) {
loadTools(packages);
return duplicatedTools;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@

package org.opencb.opencga.analysis.tools;

import org.apache.commons.collections4.CollectionUtils;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.variant.manager.VariantStorageManager;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.config.Configuration;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.tools.ToolParams;
Expand All @@ -32,6 +34,7 @@

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;

public class ToolRunner {

Expand All @@ -42,16 +45,30 @@ public class ToolRunner {
private final String opencgaHome;
private final ToolFactory toolFactory;

private final Configuration configuration;

public ToolRunner(String opencgaHome, CatalogManager catalogManager, StorageEngineFactory storageEngineFactory) {
this(opencgaHome, catalogManager, new VariantStorageManager(catalogManager, storageEngineFactory));
}

public ToolRunner(String opencgaHome, CatalogManager catalogManager, StorageEngineFactory storageEngineFactory,
Configuration configuration) {
this(opencgaHome, catalogManager, new VariantStorageManager(catalogManager, storageEngineFactory), configuration);
}

public ToolRunner(String opencgaHome, CatalogManager catalogManager, VariantStorageManager variantStorageManager) {
this(opencgaHome, catalogManager, variantStorageManager, null);
}

public ToolRunner(String opencgaHome, CatalogManager catalogManager, VariantStorageManager variantStorageManager,
Configuration configuration) {
this.opencgaHome = opencgaHome;
this.catalogManager = catalogManager;

this.variantStorageManager = variantStorageManager;
this.toolFactory = new ToolFactory();

this.configuration = configuration;
}

/**
Expand Down Expand Up @@ -102,9 +119,14 @@ public ExecutionResult execute(Job job, Path outDir, String token) throws Catalo
* @throws ToolException if the execution fails
*/
public ExecutionResult execute(String toolId, ObjectMap params, Path outDir, String jobId, String token) throws ToolException {
return toolFactory
.createTool(toolId)
.setUp(opencgaHome, catalogManager, variantStorageManager, params, outDir, jobId, token)
OpenCgaTool tool;
if (configuration != null && configuration.getAnalysis() != null
&& CollectionUtils.isNotEmpty(configuration.getAnalysis().getPackages())) {
tool = toolFactory.createTool(toolId, configuration.getAnalysis().getPackages());
} else {
tool = toolFactory.createTool(toolId);
}
return tool.setUp(opencgaHome, catalogManager, variantStorageManager, params, outDir, jobId, token)
.start();
}

Expand Down Expand Up @@ -177,6 +199,7 @@ public ExecutionResult execute(Class<? extends OpenCgaTool> tool, ToolParams too
* @throws ToolException if the execution fails
*/
public ExecutionResult execute(Class<? extends OpenCgaTool> tool, ObjectMap params, Path outDir, String jobId, String token) throws ToolException {

return toolFactory
.createTool(tool)
.setUp(opencgaHome, catalogManager, variantStorageManager, params, outDir, jobId, token)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.TestParamConstants;
import org.opencb.opencga.analysis.clinical.ClinicalAnalysisLoadTask;
import org.opencb.opencga.analysis.tools.ToolRunner;
import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis;
import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis;
Expand All @@ -62,12 +63,15 @@
import org.opencb.opencga.core.config.storage.CellBaseConfiguration;
import org.opencb.opencga.core.config.storage.StorageConfiguration;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.clinical.ClinicalAnalysis;
import org.opencb.opencga.core.models.clinical.ClinicalAnalysisLoadParams;
import org.opencb.opencga.core.models.cohort.Cohort;
import org.opencb.opencga.core.models.cohort.CohortCreateParams;
import org.opencb.opencga.core.models.cohort.CohortUpdateParams;
import org.opencb.opencga.core.models.common.AnnotationSet;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.models.file.FileLinkParams;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.individual.IndividualInternal;
import org.opencb.opencga.core.models.individual.Location;
Expand Down Expand Up @@ -1063,6 +1067,46 @@ public void testPedigreeGraph() throws CatalogException {
assertEquals(base64, family.getPedigreeGraph().getBase64());
}

@Test
public void testClinicalAnalysisLoading() throws IOException, ToolException, CatalogException {
String fileStr = "clinical_analyses.json.gz";

String gzFile = getClass().getResource("/biofiles/" + fileStr).getFile();
File file = catalogManager.getFileManager().link(CANCER_STUDY, new FileLinkParams(gzFile, "ca", "", "", null, null, null, null,
null), true, token).first();
System.out.println("file ID = " + file.getId());
System.out.println("file name = " + file.getName());

// Run clinical analysis load task
Path loadingOutDir = Paths.get(opencga.createTmpOutdir("_clinical_analysis_outdir"));
System.out.println("Clinical analysis load task out dir = " + loadingOutDir);

ClinicalAnalysisLoadParams params = new ClinicalAnalysisLoadParams();
params.setFile(file.getId());

toolRunner.execute(ClinicalAnalysisLoadTask.class, params, new ObjectMap(ParamConstants.STUDY_PARAM,
CANCER_STUDY), loadingOutDir, null, token);

String ca1Id = "SAP-45016-1";
String ca2Id = "OPA-6607-1";

Query query = new Query();
OpenCGAResult<ClinicalAnalysis> result = catalogManager.getClinicalAnalysisManager().search(CANCER_STUDY, query, QueryOptions.empty(),
token);
Assert.assertTrue(result.getResults().stream().map(ca -> ca.getId()).collect(Collectors.toList()).contains(ca1Id));
Assert.assertTrue(result.getResults().stream().map(ca -> ca.getId()).collect(Collectors.toList()).contains(ca2Id));

query.put("id", ca1Id);
ClinicalAnalysis clinicalAnalysis = catalogManager.getClinicalAnalysisManager().search(CANCER_STUDY, query, QueryOptions.empty(),
token).first();
Assert.assertEquals(ca1Id, clinicalAnalysis.getId());

query.put("id", ca2Id);
clinicalAnalysis = catalogManager.getClinicalAnalysisManager().search(CANCER_STUDY, query, QueryOptions.empty(),
token).first();
Assert.assertEquals(ca2Id, clinicalAnalysis.getId());
}

@Test
public void testCellbaseConfigure() throws Exception {
String project = "Project_test_cellbase_configure";
Expand Down
2 changes: 2 additions & 0 deletions opencga-app/app/cloud/docker/compose/conf/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ healthCheck:


analysis:
packages: # List of packages where to find analysis tools
- "org.opencb.opencga"
scratchDir: "" # Scratch folder for the analysis.
execution:
# Accepted values are "local", "SGE", "azure-batch", "k8s"
Expand Down
2 changes: 1 addition & 1 deletion opencga-app/app/misc/clients/r_client_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, server_url, output_dir):
'Analysis - Clinical': 'Clinical',
'Operations - Variant Storage': 'Operation',
'Meta': 'Meta',
'Cva': 'Cva',
'Cvdb': 'Cvdb',
'GA4GH': 'GA4GH',
'Admin': 'Admin'
}
Expand Down
2 changes: 1 addition & 1 deletion opencga-app/app/misc/clients/rest_client_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, rest_api_file, output_dir):
'Analysis - Clinical': 'ClinicalAnalysis',
'Operations - Variant Storage': 'VariantOperation',
'Meta': 'Meta',
'Cva': 'Cva',
'Cvdb': 'Cvdb',
'GA4GH': 'GA4GH',
'Admin': 'Admin'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import static org.opencb.opencga.app.cli.internal.options.AlignmentCommandOptions.SamtoolsCommandOptions.SAMTOOLS_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.CancerTieringCommandOptions.CANCER_TIERING_INTERPRETATION_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.ExomiserInterpretationCommandOptions.EXOMISER_INTERPRETATION_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.ImportClinicalAnalysesCommandOptions.IMPORT_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.RgaAuxiliarSecondaryIndexCommandOptions.RGA_AUX_INDEX_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.RgaSecondaryIndexCommandOptions.RGA_INDEX_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.TeamCommandOptions.TEAM_INTERPRETATION_RUN_COMMAND;
Expand Down Expand Up @@ -226,6 +227,7 @@ public InternalCliOptionsParser() {
clinicalSubCommands.addCommand(RGA_INDEX_RUN_COMMAND, clinicalCommandOptions.rgaSecondaryIndexCommandOptions);
clinicalSubCommands.addCommand(RGA_AUX_INDEX_RUN_COMMAND, clinicalCommandOptions.rgaAuxiliarSecondaryIndexCommandOptions);
clinicalSubCommands.addCommand(EXOMISER_INTERPRETATION_RUN_COMMAND, clinicalCommandOptions.exomiserInterpretationCommandOptions);
clinicalSubCommands.addCommand(IMPORT_COMMAND, clinicalCommandOptions.importClinicalAnalysesCommandOptions);
clinicalSubCommands.addCommand("tsv-load", clinicalCommandOptions.tsvLoad);

fileCommandOptions = new FileCommandOptions(commonCommandOptions, jCommander);
Expand Down
Loading
Loading