-
Notifications
You must be signed in to change notification settings - Fork 1
/
dna-is-synthesizable.go
175 lines (145 loc) · 5.06 KB
/
dna-is-synthesizable.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
"regexp"
"strings"
"github.com/Open-Science-Global/idtapi"
"github.com/Open-Science-Global/poly/io/genbank"
"github.com/spf13/cobra"
)
func main() {
Execute()
}
type SeqInfo struct {
Name string `json:"name"`
IsSynthesizable bool `json:"isSynthesizable"`
ComplexityScore float64 `json:"complexityScore"`
Sequence string `json:"sequence"`
Problems []idtapi.Problem `json:"problems"`
}
var (
input string
output string
pattern string
idtUsername string
idtPassword string
idtClientId string
idtClientSecret string
isAlert bool
)
var TokenURL = "https://www.idtdna.com/Identityserver/connect/token"
var ComplexityScoreURL = "https://www.idtdna.com/api/complexities/screengBlockSequences"
var MaxIDTSequenceLength = 3000
var rootCmd = &cobra.Command{
Use: "dna-is-synthesizable",
Short: "A github action to check if a part is synthesizable.",
Long: "A github action to check if a part is synthesizable from a given Genbank file.",
Run: func(cmd *cobra.Command, args []string) {
Script(input, output, pattern, isAlert, idtUsername, idtPassword, idtClientId, idtClientSecret)
},
}
func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func init() {
rootCmd.PersistentFlags().StringVarP(&input, "input", "i", "", "Directory where all the input genbank files will be read")
rootCmd.PersistentFlags().StringVarP(&output, "ouput", "o", "", "Directory where all the output genbank files wil be written")
rootCmd.PersistentFlags().StringVarP(&pattern, "pattern", "r", "", "Regex to filter files in the input directory")
rootCmd.PersistentFlags().BoolVarP(&isAlert, "alert", "a", false, "Display an error when a non-synthesizable sequence is found")
rootCmd.PersistentFlags().StringVarP(&idtUsername, "username", "u", "", "IDT account username")
rootCmd.PersistentFlags().StringVarP(&idtPassword, "password", "p", "", "IDT account password")
rootCmd.PersistentFlags().StringVarP(&idtClientId, "clientId", "c", "", "IDT API ClientId")
rootCmd.PersistentFlags().StringVarP(&idtClientSecret, "clientSecret", "s", "", "IDT API ClientSecret")
rootCmd.MarkFlagRequired("input")
rootCmd.MarkFlagRequired("ouput")
rootCmd.MarkFlagRequired("pattern")
rootCmd.MarkFlagRequired("username")
rootCmd.MarkFlagRequired("password")
rootCmd.MarkFlagRequired("clientId")
rootCmd.MarkFlagRequired("clientSecret")
}
func Script(inputDir string, outputDir string, pattern string, isAlert bool, username string, password string, clientId string, clientSecret string) {
filesPath := getListFilesByPattern(inputDir, pattern)
var sequences []idtapi.Sequence
for _, filePath := range filesPath {
sequence := genbank.Read(filePath)
sequences = append(sequences, idtapi.Sequence{sequence.Meta.Locus.Name, strings.ToUpper(sequence.Sequence)})
}
infos := sequencesAreSynthesizable(sequences, username, password, clientId, clientSecret)
writeJsonFile(infos, "synthesizable.json", outputDir)
if isAlert {
checkAndAlert(infos)
}
}
func checkAndAlert(infos []SeqInfo) {
haveProblems := false
for _, info := range infos {
if !info.IsSynthesizable {
haveProblems = true
fmt.Printf("The sequence %s can't be synthesized.\nSequence: %s\n\n", info.Name, info.Sequence)
}
}
if haveProblems {
log.Fatalln("WARNING: fatal error. Check the problems in the output file and try again.")
}
}
func writeJsonFile(data []SeqInfo, fileName string, outputDir string) {
outputPath := outputDir + "/"
if _, err := os.Stat(outputPath); os.IsNotExist(err) {
os.Mkdir(outputDir, 0755)
}
json, _ := json.Marshal(data)
filePath := outputPath + fileName
err := ioutil.WriteFile(filePath, json, 0644)
if err != nil {
log.Fatal(err)
}
}
func getListFilesByPattern(inputDir string, pattern string) []string {
files, err := ioutil.ReadDir(inputDir)
if err != nil {
log.Fatal(err)
}
var filesPath []string
for _, f := range files {
var validFile = regexp.MustCompile(pattern)
if validFile.MatchString(f.Name()) {
file := inputDir + "/" + f.Name()
filesPath = append(filesPath, file)
}
}
return filesPath
}
func sequencesAreSynthesizable(sequences []idtapi.Sequence, username string, password string, clientId string, clientSecret string) []SeqInfo {
sequencesComplexityScore := idtapi.GetComplexityScore(sequences, username, password, clientId, clientSecret, ComplexityScoreURL, TokenURL)
var infos []SeqInfo
for index, sequence := range sequences {
isSynthesizable := false
score := calculateComplexityScore(sequencesComplexityScore[index])
if score < 10.0 && len(sequence.Sequence) <= MaxIDTSequenceLength {
isSynthesizable = true
}
infos = append(infos, SeqInfo{
sequence.Name,
isSynthesizable,
score,
sequence.Sequence,
sequencesComplexityScore[index],
})
}
return infos
}
func calculateComplexityScore(problems []idtapi.Problem) float64 {
score := 0.0
for _, problem := range problems {
score += problem.ComplexityScore
}
return score
}