-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
187 lines (146 loc) · 7.05 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
package main
import (
"fmt"
)
func main() {
/*cifReader := GenerateCIFReader("cif_files/1fez.cif")
test, _ := ReadCIFToFasta(cifReader) */
//fmt.Println(GORMethodInput("GorParams/alpha_helix.txt"))
// Read the parameters file
parameters := ReadParameters("CFparameters.txt")
// Read the AAIndex file
aaIndexMap := ReadAAIndexMap("aa_index_map.txt")
var algorithm int
// Determine which algorithm to use
fmt.Println("Which algorithm do you want to use? 1)Chou-Fasman OR 2)GOR. Type 1 or 2.")
fmt.Scanln(&algorithm)
// Determine input. Then either translate or read from FASTA.
// There will three types of input: an array of Ensembls IDs, a FASTA file, or a DNA sequence.
fmt.Println("Are you i)supplying an 'array' of Ensembls OR ii)Reading from 'FASTA' OR iii)Reading from 'DNA'?")
var fileType string
fmt.Scanln(&fileType)
var proteins []Protein
var outputNames []string
var uniprotIDs []string
// NOTE: THE 3D VISUALIZATION IS ONLY AVAILABLE FOR THE ARRAY INPUT, AS ONLY THEN CAN WE GET THE PDB FILE
// Make the proteins slice according to the input
if fileType == "array" {
fmt.Println("Enter the Ensembl IDs separated by commas:")
var ensemblInput string
// ask the user to enter the uniprot IDs
fmt.Scanln(&ensemblInput)
// make a slice of uniprot IDs using the uniportInput string
ensemblIDs := MakeEnsemblArray(ensemblInput)
// make a slice of proteins using the ensembl IDs
proteins = EnsemblToProteinSlice(ensemblIDs)
// convert the ensembl IDs to uniprot IDs
uniprotIDs = EnsemblToUniprotSlice(ensemblIDs)
// need to download the pdb files for the uniprot IDs
DownloadPDBFiles(uniprotIDs)
// also make a outputNames array which will store the outputNames of the output files
outputNames = ensemblIDs
} else if fileType == "FASTA" {
// ask the user to enter the name of the fasta file
fmt.Println("Enter the name of the FASTA file:")
var fastaInput string
fmt.Scanln(&fastaInput)
// make a slice of proteins using the fasta file
reader := GenerateFASTAReader(fastaInput)
proteins = ReadProteinsFASTA(reader)
reader.file.Close() // closing the file after calling the ReadProteins function
// make a outputNames array which will store the outputNames of the output files
outputNames = MakeOutputNames(proteins)
} else if fileType == "DNA" {
// ask the user to enter the name of the DNA file
fmt.Println("Enter the name of the DNA file:")
var dnaInput string
fmt.Scanln(&dnaInput)
// make a slice of proteins using the dna file
reader := GenerateDNAReader(dnaInput)
proteins = append(proteins, TranslateDNA(reader))
reader.file.Close() // closing the file after calling the ReadProteins function
// make a outputNames array (specific to DNA type run) which will store the outputNames of the output files
outputNames = MakeDNAOutputNames(proteins)
} else {
panic("Incorrect reading source entered.")
}
if algorithm == 1 {
// made a slice of slice of CFScore, for each protein in fasta file
ProteinPredArray := make([][]CFScore, len(proteins))
for itr, protein := range proteins {
ProteinPredArray[itr] = ChouFasman(protein, parameters, aaIndexMap)
fmt.Println(protein, parameters, aaIndexMap)
// predict helices
helices := IdentifyHelicies(ProteinPredArray[itr])
fmt.Println("\n\nFound Helicies:", helices)
// predict beta sheets
betaSheets := IdentifySheets2(ProteinPredArray[itr])
fmt.Println("\n\nFound Beta Sheets:", betaSheets)
// reassign the helices and sheets as appropriate
reassignedABHelixSheet := AHelicalBSheetAssignment(append(helices, betaSheets...))
reassignedABHelixSheet = FillGapsInSequence(len(ProteinPredArray), reassignedABHelixSheet)
//fmt.Println("\n\nFound after Reassignment:", reassignedABHelixSheet)
reassignedABHelixSheet = IdentifyTurns(protein, parameters, aaIndexMap, reassignedABHelixSheet)
fmt.Println("\n\nFound after Reassignment:", reassignedABHelixSheet)
fmt.Println("Completed secondary structure assignment using CF!")
// VISUALIZATION CODE BELOW
// make int slice, which would store the information about the secondary structure of each position
// (1 = helix, 2 = sheet, 3 = loop).
aaSecStruct := make([]int, len(protein.Sequence))
// It should be initialized with 3s (as if not helix or sheet, then loop) and we assign helix and sheet below
for i := 0; i < len(aaSecStruct); i++ {
aaSecStruct[i] = 3
}
// convert the reassignedABHelixSheet slice to aaSecStruct slice
aaSecStruct = ConvertABHelixSheetToAASecStruct(reassignedABHelixSheet, aaSecStruct)
// 2D VISUALIZATION
// name according to the outputNames array
Make2DPlot(aaSecStruct, "outputs/2d_plots/2DPlot_"+outputNames[itr]+".png")
// 3D VISUALIZATION (only if filetype is 'array')
if fileType == "array" {
// name output according to the outputNames array and also the pdb will be existing in the
// 3d_visualization_resources folder with the same name
Make3DPlot(aaSecStruct, "3d_visualization_resources/"+uniprotIDs[itr]+".pdb", "outputs/3d_htmls/3DPlot_"+outputNames[itr]+".html")
}
}
} else if algorithm == 2 {
ProteinPredGorArray := make([][]CFScore, len(proteins))
for itr, protein := range proteins {
alphaParam := GORMethodInput("GorParams/alpha_helix.txt")
sheetParam := GORMethodInput("GorParams/beta_strand.txt")
turnParam := GORMethodInput("GorParams/beta_turn.txt")
coilParam := GORMethodInput("GorParams/coil.txt")
params := make([][][]float64, 4)
params[0], params[1], params[2], params[3] = alphaParam, sheetParam, turnParam, coilParam
aaIndexMap := ReadAAIndexMap("aa_index_map_gor.txt")
ProteinPredGorArray[itr] = GORPrediction(protein, params, aaIndexMap)
ssStruc := GorPredictionConv(ConvertPredToArr(ProteinPredGorArray[itr]))
fmt.Println("Completed secondary structure assignment using GOR!")
// VISUALIZATION CODE BELOW
// make int slice, which would store the information about the secondary structure of each position
// (1 = helix, 2 = sheet, 3 = loop).
aaSecStruct := make([]int, len(protein.Sequence))
// It should be initialized with 3s (as if not helix or sheet, then loop) and we assign helix and sheet below
for i := 0; i < len(aaSecStruct); i++ {
aaSecStruct[i] = 3
}
// convert the reassignedABHelixSheet slice to aaSecStruct slice
aaSecStruct = ConvertABHelixSheetToAASecStruct(ssStruc, aaSecStruct)
fmt.Println(aaSecStruct)
// 2D VISUALIZATION
// name according to the outputNames array
Make2DPlot(aaSecStruct, "outputs/2d_plots/2DPlot_"+outputNames[itr]+"GOR.png")
// 3D VISUALIZATION (only if filetype is 'array')
if fileType == "array" {
// name output according to the outputNames array and also the pdb will be existing in the
// 3d_visualization_resources folder with the same name
Make3DPlot(aaSecStruct, "3d_visualization_resources/"+uniprotIDs[itr]+".pdb", "outputs/3d_htmls/3DPlot_"+outputNames[itr]+"GOR.html")
}
}
} else {
panic("Incorrect algorithm identifier entered.")
}
//fmt.Println("Testing")
// below line is for testing purposes
TestGorAndFasman("results/ExpectedValues", parameters, aaIndexMap)
}