Skip to content

Commit

Permalink
remove dependency on htslib (bgzip)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorenretel committed Sep 20, 2024
1 parent 51ce65c commit cc32e10
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
5 changes: 4 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ exclude .egg-info/*
exclude benchmark/*
exclude docs/*
exclude examples/*
exclude example_data/*
exclude example_data/
exclude images/*
exclude .flake8
exclude .pre-commit-config.yaml
exclude environment.yml
exclude tmp/*

include example_data/some_intervals.tsv
include example_data/some_positions.tsv
16 changes: 13 additions & 3 deletions bigwig_loader/download_example_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import gzip
import hashlib
import logging
import subprocess
import shutil
import urllib.request
from pathlib import Path
from typing import BinaryIO
Expand All @@ -21,15 +22,24 @@ def get_reference_genome(reference_genome_path: Path = config.reference_genome)
if reference_genome_path.exists():
return reference_genome_path
elif compressed_file.exists():
subprocess.run(["bgzip", "-d", compressed_file])
# subprocess.run(["bgzip", "-d", compressed_file])
unzip_gz_file(compressed_file, reference_genome_path)
else:
LOGGER.info("Need reference genome for tests. Downloading it from ENCODE.")
url = "https://www.encodeproject.org/files/GRCh38_no_alt_analysis_set_GCA_000001405.15/@@download/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.gz"
urllib.request.urlretrieve(url, compressed_file)
subprocess.run(["bgzip", "-d", compressed_file])
# subprocess.run(["bgzip", "-d", compressed_file])
unzip_gz_file(compressed_file, reference_genome_path)
return reference_genome_path


def unzip_gz_file(compressed_file_path: Path, output_file_path: Path) -> Path:
with gzip.open(compressed_file_path, "rb") as gz_file:
with open(output_file_path, "wb") as output_file:
shutil.copyfileobj(gz_file, output_file)
return output_file_path


EXAMPLE_FILES = {
"ENCFF270YCY.bigWig": (
"https://www.encodeproject.org/files/ENCFF270YCY/@@download/ENCFF270YCY.bigWig",
Expand Down

0 comments on commit cc32e10

Please sign in to comment.