Skip to content

Commit

Permalink
Revisions (#59)
Browse files Browse the repository at this point in the history
* Added QC flags
* Added per-read GC content histograms
* Added % read length vs. base modification probability scatter plot
* Added base modification probability histogram
  • Loading branch information
jonperdomo authored Jan 21, 2025
1 parent 4a610e4 commit 47f1310
Show file tree
Hide file tree
Showing 26 changed files with 1,542 additions and 845 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:

- name: Build LongReadSum
shell: bash --login {0} # --login enables PATH variable access
run: make
run: make -d

- name: Run tests
shell: bash --login {0}
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ SRC_DIR := $(CURDIR)/src
LIB_DIR := $(CURDIR)/lib

# Set the library paths for the compiler
# CONDA_PREFIX ?= $(shell echo $$CONDA_PREFIX)
# LIBRARY_PATHS := -L$(LIB_DIR) -L$(CONDA_PREFIX)/lib
# INCLUDE_PATHS := -I$(INCL_DIR) -I$(CONDA_PREFIX)/include
LIBRARY_PATHS := -L$(LIB_DIR) -L/usr/share/miniconda/envs/longreadsum/lib
INCLUDE_PATHS := -I$(INCL_DIR) -I/usr/share/miniconda/envs/longreadsum/include

Expand All @@ -11,9 +14,14 @@ all: swig_build compile

# Generate the SWIG Python/C++ wrappers
swig_build:
mkdir -p $(LIB_DIR)
swig -c++ -python -outdir $(LIB_DIR) -I$(INCL_DIR) -o $(SRC_DIR)/lrst_wrap.cpp $(SRC_DIR)/lrst.i

# Compile the C++ shared libraries into lib/
compile:
LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):/usr/share/miniconda/envs/longreadsum/lib \
CXXFLAGS="$(INCLUDE_PATHS)" LDFLAGS="$(LIBRARY_PATHS)" python3 setup.py build_ext --build-lib $(LIB_DIR)

# Clean the build directory
clean:
$(RM) -r $(LIB_DIR)/*.so $(LIB_DIR)/*.py $(SRC_DIR)/lrst_wrap.cpp build/
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ MinION R9.4.1 from https://labs.epi2me.io/gm24385-5mc/)

## General usage
```
longreadsum bam -i $INPUT_FILE -o $OUTPUT_DIRECTORY --ref $REF_GENOME --modprob 0.8
longreadsum bam -i $INPUT_FILE -o $OUTPUT_DIRECTORY --mod --modprob 0.8 --ref $REF_GENOME
```

# RRMS BAM
Expand Down Expand Up @@ -258,7 +258,12 @@ longreadsum bam -i $INPUT_FILE -o $OUTPUT_DIRECTORY
# ONT POD5

This section describes how to generate QC reports for ONT POD5 (signal) files and their corresponding basecalled BAM files (data shown is HG002 using ONT
R10.4.1 and LSK114 downloaded from the tutorial https://github.com/epi2me-labs/wf-basecalling).
R10.4.1 and LSK114 downloaded from the tutorial
https://github.com/epi2me-labs/wf-basecalling).

> [!NOTE]
> This requires generating basecalled BAM files with the move table output. For
> example, for [dorado](https://github.com/nanoporetech/dorado), the parameter is `--emit-moves`
![image](https://github.com/user-attachments/assets/62c3c810-5c1a-4124-816b-74245af8b57c)

Expand Down
9 changes: 9 additions & 0 deletions conda/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,27 @@
# Add the library path to the LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PREFIX}/lib

# Ensure the lib directory exists
mkdir -p "${SRC_DIR}"/lib

# Generate the SWIG files
echo "Generating SWIG files..."
swig -c++ -python -outdir "${SRC_DIR}"/lib -I"${SRC_DIR}"/include -I"${PREFIX}"/include -o "${SRC_DIR}"/src/lrst_wrap.cpp "${SRC_DIR}"/src/lrst.i

# Generate the shared library
echo "Building the shared library..."
$PYTHON setup.py -I"${PREFIX}"/include -L"${PREFIX}"/lib install

# Create the src directory
mkdir -p "${PREFIX}"/src

# Copy source files to the bin directory
echo "Copying source files..."
cp -r "${SRC_DIR}"/src/*.py "${PREFIX}"/bin

# Copy the SWIG generated library to the lib directory
echo "Copying SWIG generated library..."
cp -r "${SRC_DIR}"/lib/*.py "${PREFIX}"/lib
cp -r "${SRC_DIR}"/lib/*.so "${PREFIX}"/lib

echo "Build complete."
15 changes: 7 additions & 8 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{% set version = "1.4.0" %}
# {% set revision = "b06670513616fd6342233c1c77e6d0bcf138b3bc" %}
{% set revision = "c257da611a4ae2cfcb4c6c42fcb504f808d644f9" %}

package:
name: longreadsum
version: {{ version }}

source:
path: ../
# git_url: https://github.com/WGLab/LongReadSum.git
# git_rev: {{ revision }}
git_url: https://github.com/WGLab/LongReadSum.git
git_rev: {{ revision }}
# path: ../

channels:
- conda-forge
Expand All @@ -29,18 +29,17 @@ requirements:
host:
- python=3.9
- swig
- hdf5
- htslib=1.20
- ont_vbz_hdf_plugin # Contains HDF5 as a dependency as well
# - jannessp::pod5
# - jannessp::lib-pod5
run:
- python=3.9
- numpy
- hdf5
- ont_vbz_hdf_plugin
- htslib=1.20
- bioconda::htslib=1.20
- plotly
- janessp::pod5
- jannessp::pod5
- pyarrow
# - janessp::lib-pod5

Expand Down
12 changes: 6 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
name: longreadsum
channels:
- conda-forge
- jannessp # for pod5
- bioconda
- defaults
- jannessp # for pod5

dependencies:
- python=3.9
- python
- numpy
- hdf5
- ont_vbz_hdf_plugin
- htslib=1.20
- bioconda::htslib=1.20
- swig
- plotly
- pytest
- pod5
- pyarrow
- jannessp::pod5
- pyarrow
9 changes: 6 additions & 3 deletions include/hts_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class HTSReader {
bool reading_complete = false;

// Update read and base counts
int updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics* basic_qc, uint64_t *base_quality_distribution);
int updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics& basic_qc, Basic_Seq_Quality_Statistics& seq_quality_info, bool is_primary);

// Read the next batch of records from the BAM file
int readNextRecords(int batch_size, Output_BAM & output_data, std::mutex & read_mutex, std::unordered_set<std::string>& read_ids, double base_mod_threshold);
Expand All @@ -47,9 +47,12 @@ class HTSReader {
bool hasNextRecord();

// Return the number of records in the BAM file using the BAM index
int64_t getNumRecords(const std::string &bam_file_name, Output_BAM &final_output, bool mod_analysis, double base_mod_threshold);
int getNumRecords(const std::string &bam_file_name, int thread_count);

std::map<int, int> getQueryToRefMap(bam1_t *record);
// Run base modification analysis
void runBaseModificationAnalysis(const std::string &bam_filename, Output_BAM& final_output, double base_mod_threshold, int read_count, int sample_count, int thread_count);

std::map<int, int> getQueryToRefMap(bam1_t* record);

// Add a modification to the base modification map
void addModificationToQueryMap(std::map<int32_t, std::tuple<char, char, double, int>> &base_modifications, int32_t pos, char mod_type, char canonical_base, double likelihood, int strand);
Expand Down
Loading

0 comments on commit 47f1310

Please sign in to comment.