1
1
import Long from 'long'
2
- import { Buffer } from 'buffer'
3
2
import { unzip } from '@gmod/bgzf-filehandle'
4
3
5
4
import VirtualOffset , { fromBytes } from './virtualOffset'
@@ -11,6 +10,12 @@ import IndexFile, { Options } from './indexFile'
11
10
const CSI1_MAGIC = 21582659 // CSI\1
12
11
const CSI2_MAGIC = 38359875 // CSI\2
13
12
13
+ const formats = {
14
+ 0 : 'generic' ,
15
+ 1 : 'SAM' ,
16
+ 2 : 'VCF' ,
17
+ }
18
+
14
19
function lshift ( num : number , bits : number ) {
15
20
return num * 2 ** bits
16
21
}
@@ -49,26 +54,27 @@ export default class CSI extends IndexFile {
49
54
throw new Error ( 'CSI indexes do not support indexcov' )
50
55
}
51
56
52
- parseAuxData ( bytes : Buffer , offset : number ) {
53
- const formatFlags = bytes . readInt32LE ( offset )
57
+ parseAuxData ( bytes : Uint8Array , offset : number ) {
58
+ const dataView = new DataView ( bytes . buffer )
59
+ const formatFlags = dataView . getInt32 ( offset , true )
54
60
const coordinateType =
55
61
formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
56
- const format = { 0 : 'generic' , 1 : 'SAM' , 2 : 'VCF' } [ formatFlags & 0xf ]
62
+ const format = formats [ ( formatFlags & 0xf ) as 0 | 1 | 2 ]
57
63
if ( ! format ) {
58
64
throw new Error ( `invalid Tabix preset format flags ${ formatFlags } ` )
59
65
}
60
66
const columnNumbers = {
61
- ref : bytes . readInt32LE ( offset + 4 ) ,
62
- start : bytes . readInt32LE ( offset + 8 ) ,
63
- end : bytes . readInt32LE ( offset + 12 ) ,
67
+ ref : dataView . getInt32 ( offset + 4 , true ) ,
68
+ start : dataView . getInt32 ( offset + 8 , true ) ,
69
+ end : dataView . getInt32 ( offset + 12 , true ) ,
64
70
}
65
- const metaValue = bytes . readInt32LE ( offset + 16 )
71
+ const metaValue = dataView . getInt32 ( offset + 16 , true )
66
72
const metaChar = metaValue ? String . fromCharCode ( metaValue ) : null
67
- const skipLines = bytes . readInt32LE ( offset + 20 )
68
- const nameSectionLength = bytes . readInt32LE ( offset + 24 )
73
+ const skipLines = dataView . getInt32 ( offset + 20 , true )
74
+ const nameSectionLength = dataView . getInt32 ( offset + 24 , true )
69
75
70
76
const { refIdToName, refNameToId } = this . _parseNameBytes (
71
- bytes . slice ( offset + 28 , offset + 28 + nameSectionLength ) ,
77
+ bytes . subarray ( offset + 28 , offset + 28 + nameSectionLength ) ,
72
78
)
73
79
74
80
return {
@@ -82,47 +88,52 @@ export default class CSI extends IndexFile {
82
88
}
83
89
}
84
90
85
- _parseNameBytes ( namesBytes : Buffer ) {
91
+ _parseNameBytes ( namesBytes : Uint8Array ) {
86
92
let currRefId = 0
87
93
let currNameStart = 0
88
94
const refIdToName = [ ]
89
95
const refNameToId : Record < string , number > = { }
96
+ const decoder = new TextDecoder ( 'utf8' )
90
97
for ( let i = 0 ; i < namesBytes . length ; i += 1 ) {
91
98
if ( ! namesBytes [ i ] ) {
92
99
if ( currNameStart < i ) {
93
- let refName = namesBytes . toString ( 'utf8' , currNameStart , i )
94
- refName = this . renameRefSeq ( refName )
100
+ const refName = this . renameRefSeq (
101
+ decoder . decode ( namesBytes . subarray ( currNameStart , i ) ) ,
102
+ )
95
103
refIdToName [ currRefId ] = refName
96
104
refNameToId [ refName ] = currRefId
97
105
}
98
106
currNameStart = i + 1
99
107
currRefId += 1
100
108
}
101
109
}
102
- return { refNameToId, refIdToName }
110
+ return {
111
+ refNameToId,
112
+ refIdToName,
113
+ }
103
114
}
104
115
105
116
// fetch and parse the index
106
117
107
118
async _parse ( opts : Options = { } ) {
108
119
const bytes = await unzip ( await this . filehandle . readFile ( opts ) )
120
+ const dataView = new DataView ( bytes . buffer )
109
121
110
122
// check TBI magic numbers
111
123
let csiVersion
112
- if ( bytes . readUInt32LE ( 0 ) === CSI1_MAGIC ) {
124
+ if ( dataView . getUint32 ( 0 , true ) === CSI1_MAGIC ) {
113
125
csiVersion = 1
114
- } else if ( bytes . readUInt32LE ( 0 ) === CSI2_MAGIC ) {
126
+ } else if ( dataView . getUint32 ( 0 , true ) === CSI2_MAGIC ) {
115
127
csiVersion = 2
116
128
} else {
117
129
throw new Error ( 'Not a CSI file' )
118
- // TODO: do we need to support big-endian CSI files?
119
130
}
120
131
121
- this . minShift = bytes . readInt32LE ( 4 )
122
- this . depth = bytes . readInt32LE ( 8 )
132
+ this . minShift = dataView . getInt32 ( 4 , true )
133
+ this . depth = dataView . getInt32 ( 8 , true )
123
134
this . maxBinNumber = ( ( 1 << ( ( this . depth + 1 ) * 3 ) ) - 1 ) / 7
124
135
const maxRefLength = 2 ** ( this . minShift + this . depth * 3 )
125
- const auxLength = bytes . readInt32LE ( 12 )
136
+ const auxLength = dataView . getInt32 ( 12 , true )
126
137
const aux =
127
138
auxLength && auxLength >= 30
128
139
? this . parseAuxData ( bytes , 16 )
@@ -134,35 +145,33 @@ export default class CSI extends IndexFile {
134
145
coordinateType : 'zero-based-half-open' ,
135
146
format : 'generic' ,
136
147
}
137
- const refCount = bytes . readInt32LE ( 16 + auxLength )
148
+ const refCount = dataView . getInt32 ( 16 + auxLength , true )
138
149
139
150
// read the indexes for each reference sequence
140
151
let firstDataLine : VirtualOffset | undefined
141
152
let currOffset = 16 + auxLength + 4
142
153
const indices = new Array ( refCount ) . fill ( 0 ) . map ( ( ) => {
143
- // the binning index
144
- const binCount = bytes . readInt32LE ( currOffset )
154
+ const binCount = dataView . getInt32 ( currOffset , true )
145
155
currOffset += 4
146
156
const binIndex : Record < string , Chunk [ ] > = { }
147
- let stats // < provided by parsing a pseudo-bin, if present
157
+ let stats
148
158
for ( let j = 0 ; j < binCount ; j += 1 ) {
149
- const bin = bytes . readUInt32LE ( currOffset )
159
+ const bin = dataView . getUint32 ( currOffset , true )
150
160
if ( bin > this . maxBinNumber ) {
151
- // this is a fake bin that actually has stats information
152
- // about the reference sequence in it
161
+ // this is a fake bin that actually has stats information about the
162
+ // reference sequence in it
153
163
stats = this . parsePseudoBin ( bytes , currOffset + 4 )
154
164
currOffset += 4 + 8 + 4 + 16 + 16
155
165
} else {
156
166
const loffset = fromBytes ( bytes , currOffset + 4 )
157
167
firstDataLine = this . _findFirstData ( firstDataLine , loffset )
158
- const chunkCount = bytes . readInt32LE ( currOffset + 12 )
168
+ const chunkCount = dataView . getInt32 ( currOffset + 12 , true )
159
169
currOffset += 16
160
170
const chunks = new Array ( chunkCount )
161
171
for ( let k = 0 ; k < chunkCount ; k += 1 ) {
162
172
const u = fromBytes ( bytes , currOffset )
163
173
const v = fromBytes ( bytes , currOffset + 8 )
164
174
currOffset += 16
165
- // this._findFirstData(data, u)
166
175
chunks [ k ] = new Chunk ( u , v , bin )
167
176
}
168
177
binIndex [ bin ] = chunks
@@ -186,14 +195,15 @@ export default class CSI extends IndexFile {
186
195
}
187
196
}
188
197
189
- parsePseudoBin ( bytes : Buffer , offset : number ) {
190
- const lineCount = longToNumber (
191
- Long . fromBytesLE (
192
- bytes . slice ( offset + 28 , offset + 36 ) as unknown as number [ ] ,
193
- true ,
198
+ parsePseudoBin ( bytes : Uint8Array , offset : number ) {
199
+ return {
200
+ lineCount : longToNumber (
201
+ Long . fromBytesLE (
202
+ bytes . subarray ( offset + 28 , offset + 36 ) as unknown as number [ ] ,
203
+ true ,
204
+ ) ,
194
205
) ,
195
- )
196
- return { lineCount }
206
+ }
197
207
}
198
208
199
209
async blocksForRange (
@@ -216,9 +226,8 @@ export default class CSI extends IndexFile {
216
226
return [ ]
217
227
}
218
228
219
- // const { linearIndex, binIndex } = indexes
220
-
221
- const overlappingBins = this . reg2bins ( min , max ) // List of bin #s that overlap min, max
229
+ // List of bin #s that overlap min, max
230
+ const overlappingBins = this . reg2bins ( min , max )
222
231
const chunks : Chunk [ ] = [ ]
223
232
224
233
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
0 commit comments