Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add command to update with GFA #115

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions fixtures/path-diff.gfa
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
S 3.2 CGA *
S 4.0 AAAAAAAA *
S 3.0 AT *
S 3.5 TCGATCGATCGATCGGGAACACACAGAGA *
L 3.2 + 3.5 + 0M
L 3.0 + 4.0 + 0M
L 4.0 + 3.5 + 0M
L 3.0 + 3.2 + 0M
P M-123 3.0+,3.2+,3.5+ *
P Child.m-123-Start-2-End-5-Node-4 3.0+,4.0+,3.5+ *
10 changes: 10 additions & 0 deletions fixtures/walk-diff.gfa
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
S 3.2 CGA *
S 3.5 TCGATCGATCGATCGGGAACACACAGAGA *
S 3.0 AT *
S 4.0 AAAAAAAA *
L 3.2 + 3.5 + 0M
L 3.0 + 3.2 + 0M
L 3.0 + 4.0 + 0M
L 4.0 + 3.5 + 0M
W m123 0 123 0 100 >3.0>3.2>3.5 *
W M-234 0 M-234 0 100 >3.0>4.0>3.5 *
8 changes: 8 additions & 0 deletions src/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,11 @@ pub fn write_links(writer: &mut BufWriter<File>, links: &Vec<Link>) {
});
}
}

pub fn bool_to_strand(direction: bool) -> Strand {
if direction {
Strand::Forward
} else {
Strand::Reverse
}
}
22 changes: 11 additions & 11 deletions src/gfa_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ pub struct Link<T: SampleType, S: Opt, U: Opt> {
#[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq)]
pub struct Path<T: SampleType, S: Opt, U: Opt> {
pub name: String,
pub dir: Vec<bool>,
pub nodes: Vec<T>,
pub strands: Vec<bool>,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Felt like renaming some of these fields to be clearer

pub segments: Vec<T>,
pub overlap: U,
pub opt: S,
}
Expand All @@ -172,8 +172,8 @@ pub struct Walk<T: SampleType, S: Opt> {
pub seq_id: String,
pub seq_start: i32,
pub seq_end: i32,
pub walk_dir: Vec<bool>,
pub walk_id: Vec<T>,
pub strands: Vec<bool>,
pub segments: Vec<T>,
pub opt: S,
}

Expand Down Expand Up @@ -300,8 +300,8 @@ impl<T: SampleType + Ord + Clone, S: Opt + Ord + Clone, U: Opt> Gfa<T, S, U> {
let overlap = split_line.next();
z.paths.push(Path {
name,
dir: dirs,
nodes: node_id,
strands: dirs,
segments: node_id,
overlap: U::parse1(overlap, &mut z.sequence),
opt: S::parse1(split_line.next(), &mut z.sequence),
});
Expand All @@ -320,8 +320,8 @@ impl<T: SampleType + Ord + Clone, S: Opt + Ord + Clone, U: Opt> Gfa<T, S, U> {
seq_id,
seq_start,
seq_end,
walk_dir: w1,
walk_id: w2,
strands: w1,
segments: w2,
opt: S::parse1(opt, &mut z.sequence),
});
}
Expand Down Expand Up @@ -381,8 +381,8 @@ impl<T: SampleType + Ord + Clone, S: Opt + Ord + Clone, U: Opt> Gfa<T, S, U> {
+ &walk.seq_start.to_string()
+ "-"
+ &walk.seq_end.to_string(),
dir: walk.walk_dir.clone(),
nodes: walk.walk_id.to_vec(),
strands: walk.strands.clone(),
segments: walk.segments.to_vec(),
overlap: U::parse1(None, &mut self.sequence),
opt: walk.opt.clone(),
});
Expand Down Expand Up @@ -506,7 +506,7 @@ fn walk_parser<T: SampleType>(walk: &str, s1: &mut String) -> (Vec<bool>, Vec<T>
(dirs, node_id)
}

pub fn fill_nodes(graph: &mut Gfa<u32, (), ()>) {
pub fn fill_segments(graph: &mut Gfa<u32, (), ()>) {
graph.segments.sort();

let mut filled_vec = Vec::new();
Expand Down
25 changes: 9 additions & 16 deletions src/imports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use rusqlite::Connection;
use std::collections::{HashMap, HashSet};
use std::path::Path as FilePath;

use crate::gfa::bool_to_strand;
use crate::gfa_reader::Gfa;
use crate::models::sample::Sample;
use crate::models::{
Expand All @@ -17,14 +18,6 @@ use crate::models::{
};
use crate::progress_bar::{get_handler, get_progress_bar, get_time_elapsed_bar};

fn bool_to_strand(direction: bool) -> Strand {
if direction {
Strand::Forward
} else {
Strand::Reverse
}
}

pub fn import_gfa<'a>(
gfa_path: &FilePath,
collection_name: &str,
Expand Down Expand Up @@ -84,10 +77,10 @@ pub fn import_gfa<'a>(
let mut source_node_id = PATH_START_NODE_ID;
let mut source_coordinate = 0;
let mut source_strand = Strand::Forward;
for (index, segment_id) in input_path.nodes.iter().enumerate() {
for (index, segment_id) in input_path.segments.iter().enumerate() {
let target = sequences_by_segment_id.get(segment_id).unwrap();
let target_node_id = *node_ids_by_segment_id.get(segment_id).unwrap();
let target_strand = bool_to_strand(input_path.dir[index]);
let target_strand = bool_to_strand(input_path.strands[index]);
edges.insert(edge_data_from_fields(
source_node_id,
source_coordinate,
Expand Down Expand Up @@ -116,10 +109,10 @@ pub fn import_gfa<'a>(
let mut source_node_id = PATH_START_NODE_ID;
let mut source_coordinate = 0;
let mut source_strand = Strand::Forward;
for (index, segment_id) in input_walk.walk_id.iter().enumerate() {
for (index, segment_id) in input_walk.segments.iter().enumerate() {
let target = sequences_by_segment_id.get(segment_id).unwrap();
let target_node_id = *node_ids_by_segment_id.get(segment_id).unwrap();
let target_strand = bool_to_strand(input_walk.walk_dir[index]);
let target_strand = bool_to_strand(input_walk.strands[index]);
edges.insert(edge_data_from_fields(
source_node_id,
source_coordinate,
Expand Down Expand Up @@ -176,10 +169,10 @@ pub fn import_gfa<'a>(
let mut source_coordinate = 0;
let mut source_strand = Strand::Forward;
let mut path_edge_ids = vec![];
for (index, segment_id) in input_path.nodes.iter().enumerate() {
for (index, segment_id) in input_path.segments.iter().enumerate() {
let target = sequences_by_segment_id.get(segment_id).unwrap();
let target_node_id = *node_ids_by_segment_id.get(segment_id).unwrap();
let target_strand = bool_to_strand(input_path.dir[index]);
let target_strand = bool_to_strand(input_path.strands[index]);
let key = edge_data_from_fields(
source_node_id,
source_coordinate,
Expand Down Expand Up @@ -211,10 +204,10 @@ pub fn import_gfa<'a>(
let mut source_coordinate = 0;
let mut source_strand = Strand::Forward;
let mut path_edge_ids = vec![];
for (index, segment_id) in input_walk.walk_id.iter().enumerate() {
for (index, segment_id) in input_walk.segments.iter().enumerate() {
let target = sequences_by_segment_id.get(segment_id).unwrap();
let target_node_id = *node_ids_by_segment_id.get(segment_id).unwrap();
let target_strand = bool_to_strand(input_walk.walk_dir[index]);
let target_strand = bool_to_strand(input_walk.strands[index]);
let key = edge_data_from_fields(
source_node_id,
source_coordinate,
Expand Down
17 changes: 17 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use gen::patch;
use gen::updates::fasta::update_with_fasta;
use gen::updates::gaf::{transform_csv_to_fasta, update_with_gaf};
use gen::updates::genbank::update_with_genbank;
use gen::updates::gfa::update_with_gfa;
use gen::updates::library::update_with_library;
use gen::updates::vcf::{update_with_vcf, VcfError};
use gen::views::patch::view_patches;
Expand Down Expand Up @@ -133,6 +134,9 @@ enum Commands {
/// If a new entity is found, create it as a normal import
#[arg(long, action, alias = "cm")]
create_missing: bool,
/// A GFA file to update from
#[arg(long)]
gfa: Option<String>,
},
/// Update a sequence collecting using GAF results.
#[command(name = "update-gaf", arg_required_else_help(true))]
Expand Down Expand Up @@ -468,6 +472,7 @@ fn main() {
end,
coordinate_frame,
create_missing,
gfa,
}) => {
conn.execute("BEGIN TRANSACTION", []).unwrap();
operation_conn.execute("BEGIN TRANSACTION", []).unwrap();
Expand Down Expand Up @@ -534,6 +539,18 @@ fn main() {
Ok(_) => {}
Err(e) => panic!("Failed to update. Error is: {e}"),
}
} else if let Some(gfa_path) = gfa {
match update_with_gfa(
&conn,
&operation_conn,
name,
sample.clone().as_deref(),
&new_sample.clone().unwrap(),
gfa_path,
) {
Ok(_) => {}
Err(e) => panic!("Failed to update. Error is: {e}"),
}
} else {
panic!("Unknown file type provided for update.");
}
Expand Down
1 change: 1 addition & 0 deletions src/updates.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod fasta;
pub mod gaf;
pub mod genbank;
pub mod gfa;
pub mod library;
pub mod vcf;
Loading
Loading