Skip to content

Commit

Permalink
Merge pull request #32 from Eco-Flow/nf-test-dev
Browse files Browse the repository at this point in the history
Nf test dev
  • Loading branch information
SimonDMurray authored Feb 19, 2024
2 parents aff25a3 + 4d38bd3 commit 87488d8
Show file tree
Hide file tree
Showing 190 changed files with 626,706 additions and 259,948 deletions.
Empty file modified .gitattributes
100644 → 100755
Empty file.
16 changes: 16 additions & 0 deletions .github/workflows/test-pipeline.yml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ on:
env:
NEXTFLOW_VERSION: 23.10.1
NF_TEST_VERSION: 0.8.3
AWS_CLI_VERSION: 2.15.19
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PUBLIC_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY }}
AWS_DEFAULT_REGION: 'us-east-1'

jobs:

Expand All @@ -31,6 +35,9 @@ jobs:
- name: Install nf-test
run: sudo bash; cd /opt; wget "https://github.com/askimed/nf-test/releases/download/v${NF_TEST_VERSION}/nf-test-${NF_TEST_VERSION}.tar.gz"; tar -xvf "nf-test-${NF_TEST_VERSION}.tar.gz"; chmod +x nf-test; rm "/opt/nf-test-${NF_TEST_VERSION}.tar.gz"

- name: Install aws cli
run: curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-${AWS_CLI_VERSION}.zip" -o "awscliv2.zip"; unzip awscliv2.zip; sudo ./aws/install --update

- name: Add software to path
run: echo "/opt" >> $GITHUB_PATH;

Expand All @@ -43,6 +50,12 @@ jobs:
- name: Run jcvi test
run: nf-test test tests/modules/jcvi.nf.test

- name: Download synteny module test cds files
run: aws s3 cp s3://synteny-test-data/data/synteny_input/Drosophila_santomea.cds data/synteny_input/Drosophila_santomea.cds; aws s3 cp s3://synteny-test-data/data/synteny_input/Drosophila_simulans.cds data/synteny_input/Drosophila_simulans.cds

- name: Run synteny test
run: nf-test test tests/modules/synteny.nf.test

- name: Run chromopaint test
run: nf-test test tests/modules/chromo.nf.test

Expand All @@ -55,5 +68,8 @@ jobs:
- name: Run go test
run: nf-test test tests/modules/go.nf.test

- name: Run go summarise test
run: nf-test test tests/modules/go_summarise.nf.test

- name: Clean workspace after finish
run: rm -rf $GITHUB_WORKSPACE/*
6 changes: 3 additions & 3 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ Results
Go
.nextflow*
.DS_Store
.nf-test
.nf-test.log
test-results
.nf-test*
nextflow
nf-test
Empty file modified .gitpod.yml
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
repository_type: pipeline
185 changes: 105 additions & 80 deletions README.md

Large diffs are not rendered by default.

181 changes: 181 additions & 0 deletions bin/Best_synteny_classifier_v6.classify.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/perl
use warnings;
use strict;
use List::Util qw(max);
use List::Util qw(min);

my @breaks=`ls *Break_junction_information.txt`;

#Summary of all runs:
my $outname="Trans_Inversion_junction_count.txt";
open(my $OUT, ">", $outname) or die "Could not open $outname\n";
#Print header of output file- see after 236 where we print this line
print $OUT "Comparison\tTranslocation_junctions\tInversion_junctions\tSame_direction_duplication_junctions\tLoop_direction_duplication_junctions\n";


foreach my $file (@breaks){
chomp $file;
#Sort out which name comparison we are making
my @split_name=split(/\./, $file);
my $species1=$split_name[0];
my $species2=$split_name[1];
my $combName="$species1\.$species2";

#Initiate out file handle
my $outfile="$combName\.Classification_summary.tsv";
open(my $out, ">", $outfile) or die "Could not open $outfile \n";

#Read in syntenic anchor files (produced by MScanX using the program jcvi)
my $in_break_file="$combName\.Break_junction_information.txt";
open(my $in_break, "<", $in_break_file) or die "Could not open $in_break_file\n";

#Store for each syntenic blocks gene order in sp2. Needed for next script.
my $in_sp2_order="$combName\.Sp2_synteny_order.txt";
open(my $in_order, "<", $in_sp2_order) or die "Could not open $in_sp2_order\n";

#First read in the synteny order file so we have a hash that tells us exactly where all the genes are location. Based on their order.

my %gene_order_all;
my %gene_order_max;
my %gene_order_min;
my %position_to_syntenic_block;
while ( my $line1 = <$in_order> ){
chomp $line1;
my @sp=split("\t", $line1);
my $chr=$sp[0];
my $syn=$sp[1];
my $list=$sp[2];
my @array_list=split("\,", $list);

if ($line1 =~ m/Synteny_block_number/g){
#ignore this line, its the header
}
else{
$gene_order_all{$chr}{$syn}=$list;
$gene_order_max{$chr}{$syn}=max(@array_list);
$gene_order_min{$chr}{$syn}=min(@array_list);
}

#then loop through the genes in each block to assign each position to a syntenic block number
foreach my $gene_pos (@array_list){
$position_to_syntenic_block{$chr}{$gene_pos}=$syn;
}

}


my $removeheader=<$in_break>;
my $odd_junction=0;
my $same_direction=0;
my $inversions=0;
my $translocations=0;

#For each my apparetn syntenic block.
#Check first if the gene in syntenic block 1, from range min to max, are found within any other sytenic block on the same chromosome.
while ( my $line2 = <$in_break> ){
chomp $line2;
#print "START\n$line2\n";
my @sp=split("\t", $line2);
my $chr=$sp[0];
my $syn1=$sp[1];
my $syn2=$sp[2];

my $sta_1=$sp[3];
my $end_1=$sp[4];
my $sta_2=$sp[5];
my $end_2=$sp[6];

my $type_original=$sp[7];

#calcualte direction of block
my $direction_1;
if ($sta_1 == $end_1){
$direction_1=0;
}
elsif($sta_1 < $end_1){
$direction_1=1;
}
else{
$direction_1=-1;
}

my $direction_2;
if ($sta_2 == $end_2){
$direction_2=0;
}
elsif($sta_2 < $end_2){
$direction_2=1;
}
else{
$direction_2=-1;
}


my $min_block1=$gene_order_min{$chr}{$syn1};
my $max_block1=$gene_order_max{$chr}{$syn1};
my $min_block2=$gene_order_min{$chr}{$syn2};
my $max_block2=$gene_order_max{$chr}{$syn2};

#calculate the gene order numbers from within the two blocks:
#Didn't work, as many of the syntenic blocks have genes have genes from random chromosomes or from the same chromosome in them. Could be TEs, or indels?
#print "$min_block1 $max_block1\n";
#for (my $i=$min_block1; $i<=$max_block1; $i++){
# if ($position_to_syntenic_block{$chr}{$i}){
# if ($position_to_syntenic_block{$chr}{$i} ne $syn1){
# print "Can happen : $position_to_syntenic_block{$chr}{$i}\n";
# }
# }
#}

#try to calculate the smallest gap between the two blocks.
my $gap;
my $minmax=min($sta_1,$end_1)-max($sta_2,$end_2);
my $maxmin=max($sta_1,$end_1)-min($sta_2,$end_2);
if ($minmax <= $maxmin){
$gap=$minmax;
}
else{
$gap=$maxmin;
}



if ($type_original eq "INVER"){
my $junction_type;
#try to detect if its an inversion:
if ($direction_1==0 || $direction_2==0){
$odd_junction++;
$junction_type="Odd-duplicate";
}
elsif($direction_1==1 && $direction_2==1){
$same_direction++;
$junction_type="Same_direction";
}
elsif($direction_1==0 && $direction_2==0){
$same_direction++;
$junction_type="Same_direction";
}
else{
#rest must be inversions:
$junction_type="Inversion";
$inversions++;
}
print $out "$chr\t$syn1\t$syn2\t$min_block1\t$max_block1\t$sta_1\t$end_1\t$sta_2\t$end_2\t$gap\t$junction_type\n";
}
else{
#Then its a translocation, we cannot caluclate a gap
print $out "$chr\t$syn1\t$syn2\t$min_block1\t$max_block1\t$sta_1\t$end_1\t$sta_2\t$end_2\tNA\tTranslocation\n";
$translocations++;
}

}

print $OUT "$combName\t$translocations\t$inversions\t$same_direction\t$odd_junction\n";
print "Translocations: $translocations\nInversions: $inversions\nSame_direction_likely_duplicate: $same_direction\nOdd_direction_likely_duplicates : $odd_junction \n\#(likely duplicated, one of the syntenic block starts and ends with the same gene)\nSame_direction $same_direction \n\#Gene order in same direction (so not an inversion [or translocation]), could be caused by duplications\n\n";
close $out;
close $in_break;
close $in_order;
}



Loading

0 comments on commit 87488d8

Please sign in to comment.