@@ -669,20 +669,17 @@ int mm_idx_alt_read(mm_idx_t *mi, const char *fn)
669
669
return n_alt ;
670
670
}
671
671
672
- /*******************
673
- * Known junctions *
674
- ******************* /
672
+ /***************
673
+ * BED reading *
674
+ ***************/
675
675
676
676
#define sort_key_bed (a ) ((a).st)
677
677
KRADIX_SORT_INIT (bed , mm_idx_intv1_t , sort_key_bed , 4 )
678
678
679
679
#define sort_key_end (a ) ((a).en)
680
680
KRADIX_SORT_INIT (end , mm_idx_intv1_t , sort_key_end , 4 )
681
681
682
- #define sort_key_jj (a ) ((a).off)
683
- KRADIX_SORT_INIT (jj , mm_idx_jjump1_t , sort_key_jj , 4 )
684
-
685
- static mm_idx_intv_t * mm_idx_bed_read_core (const mm_idx_t * mi , const char * fn , int read_junc , int is_pass1 )
682
+ static mm_idx_intv_t * mm_idx_bed_read_core (const mm_idx_t * mi , const char * fn , int read_junc , int min_sc )
686
683
{
687
684
gzFile fp ;
688
685
kstream_t * ks ;
@@ -729,7 +726,7 @@ static mm_idx_intv_t *mm_idx_bed_read_core(const mm_idx_t *mi, const char *fn, i
729
726
}
730
727
}
731
728
if (id < 0 || t .st < 0 || t .st >= t .en ) continue ; // contig ID not found, or other problems
732
- if (is_pass1 && t .score < 5 ) continue ; // for pass-1 BED, ignore junctions with weak signals; NB: paired with pass-1!
729
+ if (min_sc > 0 && t .score < min_sc ) continue ;
733
730
r = & I [id ];
734
731
if (i >= 11 && read_junc ) { // BED12
735
732
int32_t st , sz , en ;
@@ -762,12 +759,12 @@ static mm_idx_intv_t *mm_idx_bed_read_core(const mm_idx_t *mi, const char *fn, i
762
759
return I ;
763
760
}
764
761
765
- static mm_idx_intv_t * mm_idx_bed_read_merged (const mm_idx_t * mi , const char * fn , int read_junc , int is_pass1 )
762
+ static mm_idx_intv_t * mm_idx_bed_read_merge (const mm_idx_t * mi , const char * fn , int read_junc , int min_sc )
766
763
{
767
764
long n = 0 , n0 = 0 ;
768
765
int32_t i ;
769
766
mm_idx_intv_t * I ;
770
- I = mm_idx_bed_read_core (mi , fn , read_junc , is_pass1 );
767
+ I = mm_idx_bed_read_core (mi , fn , read_junc , min_sc );
771
768
if (I == 0 ) return 0 ;
772
769
for (i = 0 ; i < mi -> n_seq ; ++ i ) {
773
770
int32_t j , j0 , k ;
@@ -796,48 +793,13 @@ static mm_idx_intv_t *mm_idx_bed_read_merged(const mm_idx_t *mi, const char *fn,
796
793
return I ;
797
794
}
798
795
799
- static mm_idx_jjump_t * mm_idx_bed2jjump (const mm_idx_t * mi , const mm_idx_intv_t * I )
800
- {
801
- int32_t i ;
802
- mm_idx_jjump_t * J ;
803
- J = CALLOC (mm_idx_jjump_t , mi -> n_seq );
804
- for (i = 0 ; i < mi -> n_seq ; ++ i ) {
805
- int32_t j , k ;
806
- const mm_idx_intv_t * intv = & I [i ];
807
- mm_idx_jjump_t * jj = & J [i ];
808
- jj -> n = intv -> n * 2 ;
809
- jj -> a = CALLOC (mm_idx_jjump1_t , jj -> n );
810
- for (j = k = 0 ; j < intv -> n ; ++ j ) {
811
- jj -> a [k ].off = intv -> a [j ].st , jj -> a [k ].off2 = intv -> a [j ].en , jj -> a [k ].cnt = intv -> a [j ].cnt , jj -> a [k ].strand = intv -> a [j ].strand , ++ k ;
812
- jj -> a [k ].off = intv -> a [j ].en , jj -> a [k ].off2 = intv -> a [j ].st , jj -> a [k ].cnt = intv -> a [j ].cnt , jj -> a [k ].strand = intv -> a [j ].strand , ++ k ;
813
- }
814
- radix_sort_jj (jj -> a , jj -> a + jj -> n );
815
- }
816
- return J ;
817
- }
818
-
819
- int mm_idx_bed_read2 (mm_idx_t * mi , const char * fn , int read_junc , int for_score , int for_jump )
796
+ int mm_idx_bed_read (mm_idx_t * mi , const char * fn , int read_junc )
820
797
{
821
- int32_t i ;
822
- mm_idx_intv_t * I ;
823
798
if (mi -> h == 0 ) mm_idx_index_name (mi );
824
- I = mm_idx_bed_read_merged (mi , fn , read_junc , 0 );
825
- if (I == 0 ) return 0 ;
826
- if (for_jump )
827
- mi -> J = mm_idx_bed2jjump (mi , I );
828
- if (!for_score ) {
829
- for (i = 0 ; i < mi -> n_seq ; ++ i )
830
- free (I [i ].a );
831
- free (I );
832
- } else mi -> I = I ;
799
+ mi -> I = mm_idx_bed_read_merge (mi , fn , read_junc , -1 );
833
800
return 0 ;
834
801
}
835
802
836
- int mm_idx_bed_read (mm_idx_t * mi , const char * fn , int read_junc )
837
- {
838
- return mm_idx_bed_read2 (mi , fn , read_junc , 1 , 0 );
839
- }
840
-
841
803
int mm_idx_bed_junc (const mm_idx_t * mi , int32_t ctg , int32_t st , int32_t en , uint8_t * s )
842
804
{
843
805
int32_t i , left , right ;
@@ -863,6 +825,96 @@ int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uin
863
825
return left ;
864
826
}
865
827
828
+ /*********************************
829
+ * Reading junctions for jumping *
830
+ *********************************/
831
+
832
+ #define sort_key_jj (a ) ((a).off)
833
+ KRADIX_SORT_INIT (jj , mm_idx_jjump1_t , sort_key_jj , 4 )
834
+
835
+ #define sort_key_jj2 (a ) ((a).off2)
836
+ KRADIX_SORT_INIT (jj2 , mm_idx_jjump1_t , sort_key_jj2 , 4 )
837
+
838
+ static mm_idx_jjump_t * mm_idx_bed2jjump (const mm_idx_t * mi , const mm_idx_intv_t * I , uint16_t flag )
839
+ {
840
+ int32_t i ;
841
+ mm_idx_jjump_t * J ;
842
+ J = CALLOC (mm_idx_jjump_t , mi -> n_seq );
843
+ for (i = 0 ; i < mi -> n_seq ; ++ i ) {
844
+ int32_t j , k ;
845
+ const mm_idx_intv_t * intv = & I [i ];
846
+ mm_idx_jjump_t * jj = & J [i ];
847
+ jj -> n = intv -> n * 2 ;
848
+ jj -> a = CALLOC (mm_idx_jjump1_t , jj -> n );
849
+ for (j = k = 0 ; j < intv -> n ; ++ j ) {
850
+ jj -> a [k ].off = intv -> a [j ].st , jj -> a [k ].off2 = intv -> a [j ].en , jj -> a [k ].cnt = intv -> a [j ].cnt , jj -> a [k ].strand = intv -> a [j ].strand , jj -> a [k ++ ].flag = flag ;
851
+ jj -> a [k ].off = intv -> a [j ].en , jj -> a [k ].off2 = intv -> a [j ].st , jj -> a [k ].cnt = intv -> a [j ].cnt , jj -> a [k ].strand = intv -> a [j ].strand , jj -> a [k ++ ].flag = flag ;
852
+ }
853
+ radix_sort_jj (jj -> a , jj -> a + jj -> n );
854
+ }
855
+ return J ;
856
+ }
857
+
858
+ static mm_idx_jjump_t * mm_idx_jjump_merge (const mm_idx_t * mi , const mm_idx_jjump_t * J0 , const mm_idx_jjump_t * J1 )
859
+ {
860
+ int32_t i ;
861
+ mm_idx_jjump_t * J2 ;
862
+ J2 = CALLOC (mm_idx_jjump_t , mi -> n_seq );
863
+ for (i = 0 ; i < mi -> n_seq ; ++ i ) {
864
+ int32_t j , j0 , k ;
865
+ const mm_idx_jjump_t * jj0 = & J0 [i ], * jj1 = & J1 [i ];
866
+ mm_idx_jjump_t * jj2 = & J2 [i ];
867
+ // merge jj0 and jj1 into jj2; faster with sorted merge but the performance difference should be negligible
868
+ jj2 -> n = jj0 -> n + jj1 -> n ;
869
+ jj2 -> a = CALLOC (mm_idx_jjump1_t , jj2 -> n );
870
+ for (j = k = 0 ; j < jj0 -> n ; ++ j ) jj2 -> a [k ++ ] = jj0 -> a [j ];
871
+ for (j = k = 0 ; j < jj1 -> n ; ++ j ) jj2 -> a [k ++ ] = jj1 -> a [j ];
872
+ radix_sort_jj (jj2 -> a , jj2 -> a + jj2 -> n ); // sort by a[].off
873
+ // sort by a[].off and then by a[].off2 such that they can be merged later
874
+ for (j0 = 0 , j = 1 ; j <= jj2 -> n ; ++ j ) {
875
+ if (j == jj2 -> n || jj2 -> a [j0 ].off != jj2 -> a [j ].off ) {
876
+ radix_sort_jj2 (jj2 -> a + j0 , jj2 -> a + j );
877
+ j0 = j ;
878
+ }
879
+ }
880
+ // the actual merge
881
+ for (j0 = 0 , j = 1 , k = 0 ; j <= jj2 -> n ; ++ j ) {
882
+ if (j == jj2 -> n || jj2 -> a [j0 ].off != jj2 -> a [j ].off || jj2 -> a [j0 ].off2 != jj2 -> a [j ].off2 ) {
883
+ int32_t t , cnt = 0 ;
884
+ uint16_t flag = 0 ;
885
+ for (t = j0 ; t < j ; ++ t ) cnt += jj2 -> a [t ].cnt , flag |= jj2 -> a [t ].flag ;
886
+ jj2 -> a [k ] = jj2 -> a [j0 ];
887
+ jj2 -> a [k ].cnt = cnt ;
888
+ jj2 -> a [k ++ ].flag = flag ;
889
+ j0 = j ;
890
+ }
891
+ }
892
+ }
893
+ return J2 ;
894
+ }
895
+
896
+ int mm_idx_jjump_read (mm_idx_t * mi , const char * fn , int flag , int min_sc )
897
+ {
898
+ int32_t i ;
899
+ mm_idx_intv_t * I ;
900
+ mm_idx_jjump_t * J ;
901
+ if (mi -> h == 0 ) mm_idx_index_name (mi );
902
+ I = mm_idx_bed_read_merge (mi , fn , 1 , min_sc );
903
+ J = mm_idx_bed2jjump (mi , I , flag );
904
+ for (i = 0 ; i < mi -> n_seq ; ++ i ) free (I [i ].a );
905
+ free (I );
906
+ if (mi -> J ) {
907
+ mm_idx_jjump_t * J2 ;
908
+ J2 = mm_idx_jjump_merge (mi , mi -> J , J2 );
909
+ for (i = 0 ; i < mi -> n_seq ; ++ i ) {
910
+ free (mi -> J [i ].a ); free (J [i ].a );
911
+ }
912
+ free (mi -> J ); free (J );
913
+ mi -> J = J2 ;
914
+ } else mi -> J = J ;
915
+ return 0 ;
916
+ }
917
+
866
918
static int32_t mm_idx_jump_get_core (int32_t n , const mm_idx_jjump1_t * a , int32_t x ) // similar to mm_idx_find_intv()
867
919
{
868
920
int32_t s = 0 , e = n ;
0 commit comments