-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathmodels.yaml
386 lines (372 loc) · 14.4 KB
/
models.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
SequenceReference:
- in:
type: SequenceReference
refgetAccession: SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul
out:
ga4gh_identify: null
ga4gh_digest: null
ga4gh_serialize: '{"refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceReference"}'
LengthExpression:
- in:
type: LengthExpression
length: 20000
out:
ga4gh_identify: null
ga4gh_digest: null
ga4gh_serialize: '{"length":20000,"type":"LengthExpression"}'
LiteralSequenceExpression:
- in:
sequence: ACGT
type: LiteralSequenceExpression
out:
ga4gh_identify: null
ga4gh_digest: null
ga4gh_serialize: '{"sequence":"ACGT","type":"LiteralSequenceExpression"}'
ReferenceLengthExpression:
- in:
type: ReferenceLengthExpression
length: 11
repeatSubunitLength: 3
sequence: CTCCTCCTCCT
out:
ga4gh_identify: null
ga4gh_digest: null
ga4gh_serialize: '{"length":11,"repeatSubunitLength":3,"type":"ReferenceLengthExpression"}'
SequenceLocation:
- name: "SequenceLocation w/ SequenceReference"
in:
end: 44908822
start: 44908821
sequenceReference:
id: NC_000007.14
type: SequenceReference
refgetAccession: SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul
type: SequenceLocation
out:
ga4gh_digest: 4t6JnYWqHwYw9WzBT_lmWBb3tLQNalkT
ga4gh_identify: ga4gh:SL.4t6JnYWqHwYw9WzBT_lmWBb3tLQNalkT
ga4gh_serialize: '{"end":44908822,"sequenceReference":{"refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceReference"},"start":44908821,"type":"SequenceLocation"}'
ga4gh_1_3_serialize: '{"interval":{"end":{"type":"Number","value":44908822},"start":{"type":"Number","value":44908821},"type":"SequenceInterval"},"sequence_id":"F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceLocation"}'
- name: "SequenceLocation w/ SequenceReference and Ranges"
in:
end: [44908822, 44908922]
start: [44908721, 44908821]
sequenceReference:
id: NC_000007.14
type: SequenceReference
refgetAccession: SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul
type: SequenceLocation
out:
ga4gh_digest: 8-sGv9AY7GJT6QVgqbxhMXFNamnWcFJu
ga4gh_identify: ga4gh:SL.8-sGv9AY7GJT6QVgqbxhMXFNamnWcFJu
ga4gh_serialize: '{"end":[44908822,44908922],"sequenceReference":{"refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceReference"},"start":[44908721,44908821],"type":"SequenceLocation"}'
- name: "SequenceLocation w/Definite and Indefinite Ranges"
in:
end: [44908822, null]
start: [44908721, 44908821]
sequenceReference:
id: NC_000007.14
type: SequenceReference
refgetAccession: SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul
type: SequenceLocation
out:
ga4gh_digest: XQAXpesghmuDHziAcDCAmESBOPKTBhwD
ga4gh_identify: ga4gh:SL.XQAXpesghmuDHziAcDCAmESBOPKTBhwD
ga4gh_1_3_identify: ga4gh:VSL.zGh9Zy42Zu9R0sbyB1rXsxd33BIyiORk
ga4gh_serialize: '{"end":[44908822,null],"sequenceReference":{"refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceReference"},"start":[44908721,44908821],"type":"SequenceLocation"}'
ga4gh_1_3_serialize: '{"interval":{"end":{"comparator":">=","type":"IndefiniteRange","value":44908822},"start":{"max":44908821,"min":44908721,"type":"DefiniteRange"},"type":"SequenceInterval"},"sequence_id":"F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceLocation"}'
- name: "SequenceLocation w/more Definite and Indefinite Ranges"
in:
end: [null, 44908822]
start: [44908721, 44908821]
sequenceReference:
id: NC_000007.14
type: SequenceReference
refgetAccession: SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul
type: SequenceLocation
out:
ga4gh_digest: OYplG0vkUojmK2hDejylSykx-np3HPFP
ga4gh_identify: ga4gh:SL.OYplG0vkUojmK2hDejylSykx-np3HPFP
ga4gh_serialize: '{"end":[null,44908822],"sequenceReference":{"refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul","type":"SequenceReference"},"start":[44908721,44908821],"type":"SequenceLocation"}'
Adjacency:
- name: "Ambiguous linker (order 1)" # Expect different digest for different order
in:
type: Adjacency
adjoinedSequences:
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.9KdcA9ZpY1Cpvxvg8bMSLYDUpsX6GDLO
residueAlphabet: na
id: NC_000002.11
start: 456
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
end: 123
out:
ga4gh_digest: O0IbSYyhnBAtUsR51bpdoqeSo4YaDMFo
ga4gh_identify: ga4gh:AJ.O0IbSYyhnBAtUsR51bpdoqeSo4YaDMFo
ga4gh_serialize: '{"adjoinedSequences":["elmvUghL59i1XrD-Y7cwS__tBR6EEA98","bv1nX0Bsy9udUKaOKAJ-SlrysigguPre"],"linker":null,"type":"Adjacency"}'
- name: "Ambiguous linker (order 2)" # Expect different digest for different order
in:
type: Adjacency
adjoinedSequences:
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
end: 123
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.9KdcA9ZpY1Cpvxvg8bMSLYDUpsX6GDLO
residueAlphabet: na
id: NC_000002.11
start: 456
out:
ga4gh_digest: 8nZpyyB_ZRzvXIxWVdhTMRhxz0GlgkNU
ga4gh_identify: ga4gh:AJ.8nZpyyB_ZRzvXIxWVdhTMRhxz0GlgkNU
ga4gh_serialize: '{"adjoinedSequences":["bv1nX0Bsy9udUKaOKAJ-SlrysigguPre","elmvUghL59i1XrD-Y7cwS__tBR6EEA98"],"linker":null,"type":"Adjacency"}'
Allele:
- name: "rs7412@GRCh38>T w/LiteralSequenceExpression"
in:
location:
end: 44908822
start: 44908821
sequenceReference:
id: NC_0000019.10
type: SequenceReference
refgetAccession: SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl
type: SequenceLocation
state:
sequence: T
type: LiteralSequenceExpression
type: Allele
out:
ga4gh_digest: 0AePZIWZUNsUlQTamyLrjm2HWUw2opLt
ga4gh_identify: ga4gh:VA.0AePZIWZUNsUlQTamyLrjm2HWUw2opLt
ga4gh_1_3_identify: ga4gh:VA.CxiA_hvYbkD8Vqwjhx5AYuyul4mtlkpD
ga4gh_serialize: '{"location":"wIlaGykfwHIpPY2Fcxtbx4TINbbODFVz","state":{"sequence":"T","type":"LiteralSequenceExpression"},"type":"Allele"}'
ga4gh_1_3_serialize: '{"location":"QrRSuBj-VScAGV_gEdxNgsnh41jYH1Kg","state":{"sequence":"T","type":"LiteralSequenceExpression"},"type":"Allele"}'
- name: "NC_000001.11:40819438:CTCCTCCT:CTCCTCCTCCT w/ReferenceLengthExpression"
in:
type: Allele
expressions:
- syntax: spdi
value: NC_000001.11:40819438:CTCCTCCT:CTCCTCCTCCT
location:
type: SequenceLocation
sequenceReference:
refgetAccession: SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO
residueAlphabet: na
id: NC_000001.11
start: 40819438
end: 40819446
state:
type: ReferenceLengthExpression
length: 11
repeatSubunitLength: 3
out:
ga4gh_digest: Oop4kjdTtKcg1kiZjIJAAR3bp7qi4aNT
ga4gh_identify: ga4gh:VA.Oop4kjdTtKcg1kiZjIJAAR3bp7qi4aNT
ga4gh_serialize: '{"location":"nQGBuvRQOLEboA5TYtcz975fp_GulxbZ","state":{"length":11,"repeatSubunitLength":3,"type":"ReferenceLengthExpression"},"type":"Allele"}'
CisPhasedBlock:
- name: "Simple CisPhasedBlock (order 1)" # Expect same digest for different order
in:
members:
- location:
end: 602
start: 601
type: SequenceLocation
state:
sequence: C
type: LiteralSequenceExpression
type: Allele
- location:
end: 702
start: 701
type: SequenceLocation
state:
sequence: C
type: LiteralSequenceExpression
type: Allele
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
type: CisPhasedBlock
out:
ga4gh_digest: YAWwnFF0e-T7fnuT4wRzZW4Lzg7jc-zQ
ga4gh_identify: ga4gh:CPB.YAWwnFF0e-T7fnuT4wRzZW4Lzg7jc-zQ
ga4gh_serialize: '{"members":["VJIUKfuj7QCxPI-bplNjh5bv2Y8nkvW7","aYfm-2xhlRwkQdgcnJi8Wd0ILCuvsevm"],"type":"CisPhasedBlock"}'
- name: "Simple CisPhasedBlock (order 2)" # Expect same digest for different order
in:
members:
- location:
end: 702
start: 701
type: SequenceLocation
state:
sequence: C
type: LiteralSequenceExpression
type: Allele
- location:
end: 602
start: 601
type: SequenceLocation
state:
sequence: C
type: LiteralSequenceExpression
type: Allele
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
type: CisPhasedBlock
out:
ga4gh_digest: YAWwnFF0e-T7fnuT4wRzZW4Lzg7jc-zQ
ga4gh_identify: ga4gh:CPB.YAWwnFF0e-T7fnuT4wRzZW4Lzg7jc-zQ
ga4gh_serialize: '{"members":["VJIUKfuj7QCxPI-bplNjh5bv2Y8nkvW7","aYfm-2xhlRwkQdgcnJi8Wd0ILCuvsevm"],"type":"CisPhasedBlock"}'
DerivativeMolecule:
- name: "DerivativeMolecule (order 1)" # Expect different digest for different order
in:
components:
- type: TraversalBlock
orientation: forward
component:
type: Adjacency
linker:
type: LiteralSequenceExpression
sequence: GTC
adjoinedSequences:
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
end: 123
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.9KdcA9ZpY1Cpvxvg8bMSLYDUpsX6GDLO
residueAlphabet: na
id: NC_000002.11
start: 500
- type: TraversalBlock
orientation: forward
component:
type: Adjacency
adjoinedSequences:
- type: SequenceLocation
end: 15000
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
start: 10000
type: DerivativeMolecule
out:
ga4gh_digest: wthhP9ryLEU1ueo6V25gqbSVJC8H4z-M
ga4gh_identify: ga4gh:DM.wthhP9ryLEU1ueo6V25gqbSVJC8H4z-M
# ga4gh_serialize: '{"components":["OSbl1_-TZ08ggYKXSQMxAyN5kfj64Axu","lqoM5i-DPwNrC3cJxdTe0YXbzTFVvdgQ"],"type":"DerivativeMolecule"}'
- name: "DerivativeMolecule (order 2)" # Expect different digest for different order
in:
components:
- type: TraversalBlock
orientation: forward
component:
type: Adjacency
adjoinedSequences:
- type: SequenceLocation
end: 15000
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
start: 10000
- type: TraversalBlock
orientation: forward
component:
type: Adjacency
linker:
type: LiteralSequenceExpression
sequence: GTC
adjoinedSequences:
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU
residueAlphabet: na
id: NC_000001.10
end: 123
- type: SequenceLocation
sequenceReference:
type: SequenceReference
refgetAccession: SQ.9KdcA9ZpY1Cpvxvg8bMSLYDUpsX6GDLO
residueAlphabet: na
id: NC_000002.11
start: 500
type: DerivativeMolecule
out:
ga4gh_digest: NWTt9cZmyKf_tYn9vSJ-8PsAA-xChO4a
ga4gh_identify: ga4gh:DM.NWTt9cZmyKf_tYn9vSJ-8PsAA-xChO4a
# ga4gh_serialize: '{"components":["lqoM5i-DPwNrC3cJxdTe0YXbzTFVvdgQ","OSbl1_-TZ08ggYKXSQMxAyN5kfj64Axu"],"type":"DerivativeMolecule"}'
Terminus:
- in:
location:
end: 44908822
start: 44908821
sequenceReference:
type: SequenceReference
refgetAccession: SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul
type: SequenceLocation
type: Terminus
out:
ga4gh_digest: 8xpg7Q826fQJJ_6rImuqufhTXj0mh5gV
ga4gh_identify: ga4gh:TM.8xpg7Q826fQJJ_6rImuqufhTXj0mh5gV
ga4gh_serialize: '{"location":"4t6JnYWqHwYw9WzBT_lmWBb3tLQNalkT","type":"Terminus"}'
CopyNumberCount:
- name: ">=3 copies APOE"
in:
copies: [3, null]
location:
sequenceReference:
type: SequenceReference
refgetAccession: SQ.jdEWLvLvT8827O59m1Agh5H3n6kTzBsJ
end: 44909393
start: 44905795
type: SequenceLocation
type: CopyNumberCount
out:
ga4gh_digest: ezEUXykQvIhX8jHADILwC9f8k-jp8tZC
ga4gh_identify: ga4gh:CN.ezEUXykQvIhX8jHADILwC9f8k-jp8tZC
ga4gh_serialize: '{"copies":[3,null],"location":"d9h3FkfTWFkJSH56L1A26y-N2oq_SSuB","type":"CopyNumberCount"}'
CopyNumberChange:
- name: "Low-level copy gain of BRCA1"
in:
copyChange: EFO:0030071
location:
sequenceReference:
type: SequenceReference
refgetAccession: SQ.jdEWLvLvT8827O59m1Agh5H3n6kTzBsJ
end: 44909393
start: 44905795
type: SequenceLocation
type: CopyNumberChange
out:
ga4gh_digest: 2_fT_6-IpUm5aS0wp8ZAkJ01MCE569L2
ga4gh_identify: ga4gh:CX.2_fT_6-IpUm5aS0wp8ZAkJ01MCE569L2
ga4gh_serialize: '{"copyChange":"EFO:0030071","location":"d9h3FkfTWFkJSH56L1A26y-N2oq_SSuB","type":"CopyNumberChange"}'