Skip to content

Commit

Permalink
First iteration of synthesizable github actions
Browse files Browse the repository at this point in the history
  • Loading branch information
isaacguerreir committed Oct 14, 2021
0 parents commit b3c09df
Show file tree
Hide file tree
Showing 7 changed files with 853 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/input
/output
.vscode/
.github
12 changes: 12 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM golang:1.12.0-alpine3.9

RUN apk add --no-cache git
WORKDIR /app
COPY go.mod ./
COPY go.sum ./
RUN go mod download
COPY *.go ./
RUN go build -o /annotator
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
46 changes: 46 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: synthesizable
description: A Github action to annotate problematic sequences from given Genbank file.
inputs:
inputDir: # id of input
description: 'Directory with all genbank to be read and annotated'
required: true
default: 'input'
pattern:
description: 'User-specified pattern to select specific files from the input directory'
required: true
default: '.*\.\(gb\|gbk\)'
outputDir:
description: 'Directory where all genbank annotated will be written'
required: true
default: 'output'
username:
description: 'Username from an IDT account'
required: true
password:
description: 'Password from and IDT account'
required: true
clientId:
description: 'ClientId from an IDT account'
required: true
clientSecret:
description: 'Client Secret from and IDT account'
required: true
alert:
description: 'Show a error in the pipeline if found out a not synthesizable sequence'
default: false
runs:
using: 'docker'
image: 'Dockerfile'
args:
- ${{ inputs.inputDir }}
- ${{ inputs.outputDir }}
- ${{ inputs.pattern }}
- ${{ inputs.username }}
- ${{ inputs.password }}
- ${{ inputs.clientId }}
- ${{ inputs.clientSecret }}
- ${{ inputs.alert }}
author: Friendzymes Team
branding:
icon: 'layers'
color: 'purple'
4 changes: 4 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/sh -l

echo $1 $2 $3 $4 $5 $6 $7 $8
eval "/annotator -i $1 -o $2 -r $3 -u $4 -p $5 -c $6 -s $7 -a $8"
15 changes: 15 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module github.com/Open-Science-Global/synthesizable

go 1.17

require (
github.com/Open-Science-Global/idtapi v0.2.3
github.com/Open-Science-Global/poly v0.15.5
github.com/spf13/cobra v1.2.1
)

require (
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
)
596 changes: 596 additions & 0 deletions go.sum

Large diffs are not rendered by default.

176 changes: 176 additions & 0 deletions synthesizable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package main

import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
"regexp"
"strings"

"github.com/Open-Science-Global/idtapi"
"github.com/Open-Science-Global/poly/io/genbank"
"github.com/spf13/cobra"
)

func main() {
Execute()
}

type SeqInfo struct {
Name string `json:"name"`
IsSynthesizable bool `json:"isSynthesizable"`
ComplexityScore float64 `json:"complexityScore"`
Sequence string `json:"sequence"`
Problems []idtapi.Problem `json:"problems"`
}

var (
input string
output string
pattern string
idtUsername string
idtPassword string
idtClientId string
idtClientSecret string
isAlert bool
)

var TokenURL = "https://www.idtdna.com/Identityserver/connect/token"
var ComplexityScoreURL = "https://www.idtdna.com/api/complexities/screengBlockSequences"
var MaxIDTSequenceLength = 3000

var rootCmd = &cobra.Command{
Use: "synthesizable",
Short: "Synthesizable is a github action to check if a part is synthesizable.",
Long: "Synthesizable is a github action to check if a part is synthesizable from a given Genbank file.",
Run: func(cmd *cobra.Command, args []string) {
Script(input, output, pattern, isAlert, idtUsername, idtPassword, idtClientId, idtClientSecret)
},
}

func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}

func init() {
rootCmd.PersistentFlags().StringVarP(&input, "input", "i", "", "Directory where all the input genbank files will be read")
rootCmd.PersistentFlags().StringVarP(&output, "ouput", "o", "", "Directory where all the output genbank files wil be written")
rootCmd.PersistentFlags().StringVarP(&pattern, "pattern", "r", "", "Regex to selective filter specific files in the input folder")
rootCmd.PersistentFlags().BoolVarP(&isAlert, "alert", "a", false, "If a sequence is not synthesizable alert user by stopping generate a error code")

rootCmd.PersistentFlags().StringVarP(&idtUsername, "username", "u", "", "Username from the IDT account")
rootCmd.PersistentFlags().StringVarP(&idtPassword, "password", "p", "", "Password from the IDT account")
rootCmd.PersistentFlags().StringVarP(&idtClientId, "clientId", "c", "", "ClientId registered at IDT associated to the API key access")
rootCmd.PersistentFlags().StringVarP(&idtClientSecret, "clientSecret", "s", "", "ClientSecret provided by IDT associated to the clientId")

rootCmd.MarkFlagRequired("input")
rootCmd.MarkFlagRequired("ouput")
rootCmd.MarkFlagRequired("pattern")
rootCmd.MarkFlagRequired("username")
rootCmd.MarkFlagRequired("password")
rootCmd.MarkFlagRequired("clientId")
rootCmd.MarkFlagRequired("clientSecret")
}

func Script(inputDir string, outputDir string, pattern string, isAlert bool, username string, password string, clientId string, clientSecret string) {
filesPath := getListFilesByPattern(inputDir, pattern)
var sequences []idtapi.Sequence
for _, filePath := range filesPath {
sequence := genbank.Read(filePath)
sequences = append(sequences, idtapi.Sequence{sequence.Meta.Locus.Name, strings.ToUpper(sequence.Sequence)})
}

infos := sequencesAreSynthesizable(sequences, username, password, clientId, clientSecret)
writeJsonFile(infos, "synthesizable.json", outputDir)

if isAlert {
checkAndAlert(infos)
}
}

func checkAndAlert(infos []SeqInfo) {
haveProblems := false
for _, info := range infos {
fmt.Println(info)
if !info.IsSynthesizable {
haveProblems = true
fmt.Printf("The sequence %s can't be synthesized.\nSequence: %s\n\n", info.Name, info.Sequence)
}
}

if haveProblems {
log.Fatalln("WARNING: We recognize that some problems or are too long and can't be synthesizable according to IDT API. Check the problems in the output file and try again.")
}

}

func writeJsonFile(data []SeqInfo, fileName string, outputDir string) {
outputPath := outputDir + "/"

if _, err := os.Stat(outputPath); os.IsNotExist(err) {
os.Mkdir(outputDir, 0755)
}

json, _ := json.Marshal(data)

filePath := outputPath + fileName

err := ioutil.WriteFile(filePath, json, 0644)

if err != nil {
log.Fatal(err)
}
}

func getListFilesByPattern(inputDir string, pattern string) []string {
files, err := ioutil.ReadDir(inputDir)
if err != nil {
log.Fatal(err)
}
var filesPath []string
for _, f := range files {
var validFile = regexp.MustCompile(pattern)
if validFile.MatchString(f.Name()) {
file := inputDir + "/" + f.Name()
filesPath = append(filesPath, file)
}
}
return filesPath
}

func sequencesAreSynthesizable(sequences []idtapi.Sequence, username string, password string, clientId string, clientSecret string) []SeqInfo {

sequencesComplexityScore := idtapi.GetComplexityScore(sequences, username, password, clientId, clientSecret, ComplexityScoreURL, TokenURL)
var infos []SeqInfo
for index, sequence := range sequences {
isSynthesizable := false
score := calculateComplexityScore(sequencesComplexityScore[index])

if score < 10.0 && len(sequence.Sequence) <= MaxIDTSequenceLength {
isSynthesizable = true
}

infos = append(infos, SeqInfo{
sequence.Name,
isSynthesizable,
score,
sequence.Sequence,
sequencesComplexityScore[index],
})
}
return infos

}

func calculateComplexityScore(problems []idtapi.Problem) float64 {
score := 0.0
for _, problem := range problems {
score += problem.ComplexityScore
}
return score
}

0 comments on commit b3c09df

Please sign in to comment.