Skip to content

Commit

Permalink
liftover: minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
imedina committed Oct 7, 2024
1 parent 2f2c96e commit c48ce4e
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@

public abstract class DockerWrapperAnalysisExecutor extends OpenCgaToolExecutor {

// protected String study;
protected Logger logger;

public final static String DOCKER_INPUT_PATH = "/data/input";
public final static String DOCKER_OUTPUT_PATH = "/data/output";

Expand All @@ -31,6 +34,10 @@ public abstract class DockerWrapperAnalysisExecutor extends OpenCgaToolExecutor

public static final String DOCKER_CLI_MSG = "Docker CLI: ";

public DockerWrapperAnalysisExecutor() {
logger = LoggerFactory.getLogger(this.getClass());
}

public String getDockerImageName() {
return "opencb/opencga-ext-tools";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,24 +102,31 @@ protected List<String> getSteps() {
return Arrays.asList(PREPARE_RESOURCES_STEP, ID);
}

protected void run() throws ToolException {
protected void run() throws ToolException, IOException {
// Download and copy liftover resource files in the job dir
step(PREPARE_RESOURCES_STEP, this::prepareResources);

// Run liftover script
step(ID, this::runLiftover);

// Do we have to clean the liftover resource folder
// Files.newDirectoryStream(resourcePath).forEach(file -> {
// try {
// Files.delete(file);
// } catch (IOException e) {
// logger.warn("Error deleting file '{}': {}", file, e.getMessage());
// }
// });
}


protected void prepareResources() throws IOException, ToolException {
private void prepareResources() throws IOException, ToolException {
// Create folder where the liftover resources will be saved (within the job dir, aka outdir)
resourcePath = Files.createDirectories(getOutDir().resolve(RESOURCES_FOLDER));

// Identify Liftover resources to download only the required ones
Map<String, List<String>> mapResources = new HashMap<>();
switch (analysisParams.getTargetAssembly()) {
switch (analysisParams.getTargetAssembly().toUpperCase()) {
case LIFTOVER_GRCH38: {
mapResources.put(ID, Collections.singletonList("GRCh37_to_GRCh38.chain.gz"));
mapResources.put("reference-genome", Arrays.asList("Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz",
Expand Down Expand Up @@ -148,7 +155,7 @@ protected void prepareResources() throws IOException, ToolException {
}
}

protected void runLiftover() throws Exception {
private void runLiftover() throws Exception {
// Get executor
LiftoverWrapperAnalysisExecutor executor = getToolExecutor(LiftoverWrapperAnalysisExecutor.class);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class LiftoverWrapperAnalysisExecutor extends DockerWrapperAnalysisExecut
private String vcfDest;
private Path resourcePath;

private Logger logger = LoggerFactory.getLogger(this.getClass());
// private Logger logger = LoggerFactory.getLogger(this.getClass());

@Override
protected void run() throws Exception {
Expand Down Expand Up @@ -91,7 +91,7 @@ private void runLiftover(File file, Path outPath) throws ToolExecutorException {
+ " " + VIRTUAL_RESOURCES_FOLDER;

// Execute Pythong script in docker
String dockerImage = "opencb/opencga-ext-tools:" + GitRepositoryState.getInstance().getBuildVersion();
String dockerImage = getDockerImageName() + GitRepositoryState.getInstance().getBuildVersion();

String dockerCli = buildCommandLine(dockerImage, inputBindings, outputBinding, params, null);
addEvent(Event.Type.INFO, "Docker command line: " + dockerCli);
Expand Down
38 changes: 19 additions & 19 deletions opencga-app/app/analysis/liftover/liftover.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
INPUT_FILE=$1
TARGET_ASSEMBLY=$2 ## Values accepted are: GRCh38, hg38
OUTPUT_DIR=$3
RESOURCES_DIR=$4
LOCAL_RESOURCES_DIR=$4

if [ -z "$INPUT_FILE" ] || [ -z "$TARGET_ASSEMBLY" ] || [ -z "$OUTPUT_DIR" ] || [ -z "$RESOURCES_DIR" ]; then
echo "Usage: $0 <vcf_file> <target_assembly> <output_dir> <resources_dir>"
Expand All @@ -39,26 +39,26 @@ fi

if [ $TARGET_ASSEMBLY == "GRCh38" ]; then
echo "Liftover from GRCh37 to $TARGET_ASSEMBLY"
#
# ## Prepare GRCh37 and GRCh38 reference genomes
# if [ ! -f Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz ]; then

## Prepare GRCh37 and GRCh38 reference genomes
# if [ ! -f "${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz" ]; then
# wget --no-check-certificate https://resources.opencb.org/opencb/opencga/analysis/commons/reference-genomes/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz
# fi
#
# if [ ! -f Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz ]; then
# wget --no-check-certificate https://resources.opencb.org/opencb/opencga/analysis/commons/reference-genomes/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
# fi
#
echo "gunzip ${RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
gunzip ${RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz
SOURCE_REFERENCE_FILE="${RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa"

echo "gunzip ${RESOURCES_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
gunzip ${RESOURCES_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
TARGET_REFERENCE_FILE="${RESOURCES_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa"
echo "gunzip ${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
gunzip ${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz
SOURCE_REFERENCE_FILE="${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh37.dna.primary_assembly.fa"

echo "gunzip ${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
gunzip ${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
TARGET_REFERENCE_FILE="${LOCAL_RESOURCES_DIR}/Homo_sapiens.GRCh38.dna.primary_assembly.fa"

# wget http://ftp.ensembl.org/pub/assembly_mapping/homo_sapiens/GRCh37_to_GRCh38.chain.gz
CHAIN_FILE="${RESOURCES_DIR}/GRCh37_to_GRCh38.chain.gz"
CHAIN_FILE="${LOCAL_RESOURCES_DIR}/GRCh37_to_GRCh38.chain.gz"
elif [ $TARGET_ASSEMBLY == "hg38" ]; then
echo "Liftover from hg19 to $TARGET_ASSEMBLY"
#
Expand All @@ -71,16 +71,16 @@ elif [ $TARGET_ASSEMBLY == "hg38" ]; then
# wget https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
# fi
#
echo "gunzip ${RESOURCES_DIR}/hg19.fa.gz"
gunzip ${RESOURCES_DIR}/hg19.fa.gz
SOURCE_REFERENCE_FILE="${RESOURCES_DIR}/hg19.fa"
echo "gunzip ${LOCAL_RESOURCES_DIR}/hg19.fa.gz"
gunzip ${LOCAL_RESOURCES_DIR}/hg19.fa.gz
SOURCE_REFERENCE_FILE="${LOCAL_RESOURCES_DIR}/hg19.fa"

echo "gunzip ${RESOURCES_DIR}/hg38.fa.gz"
gunzip ${RESOURCES_DIR}/hg38.fa.gz
TARGET_REFERENCE_FILE="${RESOURCES_DIR}/hg38.fa"
echo "gunzip ${LOCAL_RESOURCES_DIR}/hg38.fa.gz"
gunzip ${LOCAL_RESOURCES_DIR}/hg38.fa.gz
TARGET_REFERENCE_FILE="${LOCAL_RESOURCES_DIR}/hg38.fa"

# wget http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz
CHAIN_FILE="${RESOURCES_DIR}/hg19ToHg38.over.chain.gz"
CHAIN_FILE="${LOCAL_RESOURCES_DIR}/hg19ToHg38.over.chain.gz"
else
echo "Unsupported target assembly $TARGET_ASSEMBLY"
exit 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -567,14 +567,14 @@ public class FieldConstants {
public static final String EXOMISER_CLINICAL_ANALYSIS_TYPE_DESCRIPTION = "Clinical analysis type: SINGLE or FAMILY.";

// Liftover
public static final String LIFTOVER_GRCH38 = "GRCh38";
public static final String LIFTOVER_HG38 = "hg38";
public static final String LIFTOVER_GRCH38 = "GRCH38";
public static final String LIFTOVER_HG38 = "HG38";
public static final String LIFTOVER_VCF_INPUT_FOLDER = "OPENCGA_VCF_INPUT_FOLDER";
public static final String LIFTOVER_FILES_DESCRIPTION = "List of VCF files to lift over";
public static final String LIFTOVER_TARGET_ASSEMBLY_DESCRIPTION = "Target assembly for lift over. Valid values: " + LIFTOVER_GRCH38
+ " (for Ensembl) or " + LIFTOVER_HG38 + " (for NCBI)";
public static final String LIFTOVER_VCF_DESTINATION_DESCRIPTION = "Destination path where the lifted-over VCF files will be stored."
+ " If left empty, the VCF files will be stored in the job folder. If the keyword " + LIFTOVER_VCF_INPUT_FOLDER
+ " is used, they will be stored in the same folder as the input VCF files. Otherwise, they will be stored in the specified"
+ " is used, then VCF files be stored in the same folder as the input VCF files. Otherwise, they will be stored in the specified"
+ " destination path";
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

public class LiftoverWrapperParams extends ToolParams {

public static final String DESCRIPTION = "BCFtools +liftover plugin parameterss";
public static final String DESCRIPTION = "BCFtools +liftover plugin parameters";

@DataField(id = "files", description = FieldConstants.LIFTOVER_FILES_DESCRIPTION, required = true)
private List<String> files;
Expand Down

0 comments on commit c48ce4e

Please sign in to comment.