-
Notifications
You must be signed in to change notification settings - Fork 4
/
KBaseStructure.spec
173 lines (146 loc) · 4.25 KB
/
KBaseStructure.spec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*
@author chenry jmc jjeffryes tgu2 qzhang
*/
module KBaseStructure {
typedef int bool;
/*
Reference to KBase object
@id ws KBaseGenomes.Genome KBaseGenomeAnnotations.GenomeAnnotation KBaseGenomes.Feature
*/
typedef string object_ref;
/*
type of a KBase object (e.g., type of a KBaseGenomes.Feature object)
*/
typedef string object_type;
/*
Reference to KBase genome
@id ws KBaseGenomes.Genome KBaseGenomeAnnotations.GenomeAnnotation
*/
typedef string genome_ref;
/*
Reference to KBaseStructure objects
@id ws KBaseStructure.ExperimentalProteinStructure KBaseStructure.ModelProteinStructure
*/
typedef string structure_ref;
/*
Reference to KBase metagenome
@id ws KBaseMetagenomes.AnnotatedMetagenomeAssembly KBaseGenomeAnnotations.Assembly
*/
typedef string metagenome_ref;
/*
Reference to KBaseCollections.FeatureSet
@id ws KBaseCollections.FeatureSet
*/
typedef string feature_set_ref;
/*
CDS ID
@id kb
*/
typedef string cds_id;
/*
Molecule ID
@id external
*/
typedef string mol_id;
/*
Model ID
@id external
*/
typedef int mod_id;
/*
Uniref ID
@id external
*/
typedef string uniref_id;
/*
Reference to a file handle in shock
@id handle
*/
typedef string handle_ref;
/*
ProteinData
mol_id id: ID for the protein
string sequence: amino acid sequence
string md5: hash of the amino acid sequence
uniref_id uniref_id: from uniprot
genome_ref genome_ref: from a KBase genome
object_ref feature_ref: from a KBase feature
object_type feature_type: from a KBase feature
cds_id cds_id: from a KBase genome
mod_id model_id: from PDB file
mol_id chain_id: from PDB file
float seq_identity: computed by comparing with KBase feature sequence
bool exact_match: computed according to seq_identity
@optional id uniref_id cds_id model_id chain_id seq_identity exact_match
@optional genome_ref metagenome_ref feature_ref feature_set_ref feature_type
*/
typedef structure {
mol_id id;
string sequence;
string md5;
uniref_id uniref_id;
genome_ref genome_ref;
object_ref feature_ref;
object_type feature_type;
cds_id cds_id;
metagenome_ref metagenome_ref;
feature_set_ref feature_set_ref;
/*Parsed from PDB data and computed by comparing to the KBase feature sequence*/
mod_id model_id;
mol_id chain_id;
float seq_identity;
bool exact_match;
} ProteinData;
/*
ProteinStructure - merged from previous ModelProteinStructure and ExperimentalProteinStructure
compound: a compound dict with keys in ['molecule', 'chain', 'synonym', 'misc', ...]
source: a source dict with keys in ['organism_scientific', 'organism_taxid', 'other_details', 'organ', 'misc',...]
@optional compound source
@optional user_data num_models num_het_atoms num_water_atoms num_disordered_atoms num_disordered_residues
@optional rcsb_id deposition_date head release_date structure_method resolution author
@optional mmcif_handle xml_handle
*/
typedef structure {
string name;
string user_data;
int num_chains;
int num_residues;
int num_atoms;
/*Experimental header from .cif file*/
string rcsb_id;
string deposition_date;
string head;
string release_date;
string structure_method;
float resolution;
string author;
/*Structure metadata from .cif file*/
int num_models;
int num_het_atoms;
int num_water_atoms;
int num_disordered_atoms;
int num_disordered_residues;
mapping<string, string> compound;
mapping<string, string> source;
/*Protein links*/
list<ProteinData> proteins;
/*File links*/
handle_ref pdb_handle;
handle_ref mmcif_handle;
handle_ref xml_handle;
/*Label provided by the user, bool is an int*/
bool is_model;
} ProteinStructure;
/*
ProteinStructures - using the merged ProteinStructure
protein_structures: a list of references to ProteinStructure
total_structures: total count of protein structures
description: description/remarks
@optional description
*/
typedef structure {
list<ProteinStructure> protein_structures;
int total_structures;
string description;
} ProteinStructures;
};