From 93077e5673f6f46b3561662c14df90d8b0b5c012 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:33:59 -0400 Subject: [PATCH 01/49] chore: remove middleware, routes --- src/api/main.go | 14 +- src/api/middleware/tableMiddleware.go | 35 --- src/api/mvc/tables/main.go | 324 -------------------------- 3 files changed, 2 insertions(+), 371 deletions(-) delete mode 100644 src/api/middleware/tableMiddleware.go delete mode 100644 src/api/mvc/tables/main.go diff --git a/src/api/main.go b/src/api/main.go index 7477f66f..9bf35377 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -8,7 +8,6 @@ import ( dataTypesMvc "gohan/api/mvc/data-types" genesMvc "gohan/api/mvc/genes" serviceInfoMvc "gohan/api/mvc/service-info" - tablesMvc "gohan/api/mvc/tables" variantsMvc "gohan/api/mvc/variants" workflowsMvc "gohan/api/mvc/workflows" "gohan/api/services" @@ -139,13 +138,6 @@ func main() { e.GET("/data-types/variant/schema", dataTypesMvc.GetVariantDataTypeSchema) e.GET("/data-types/variant/metadata_schema", dataTypesMvc.GetVariantDataTypeMetadataSchema) - // -- Tables - e.GET("/tables", tablesMvc.GetTables) - e.POST("/tables", tablesMvc.CreateTable) - e.GET("/tables/:id", tablesMvc.GetTables) - e.DELETE("/tables/:id", tablesMvc.DeleteTable) - e.GET("/tables/:id/summary", tablesMvc.GetTableSummary) - // -- Variants e.GET("/variants/overview", variantsMvc.GetVariantsOverview) @@ -183,15 +175,13 @@ func main() { // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute, - gam.MandateTableIdAttribute) + gam.MandateAssemblyIdAttribute) e.GET("/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) e.GET("/variants/ingestion/stats", variantsMvc.VariantsIngestionStats) e.GET("/private/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute, - gam.MandateTableIdAttribute) + gam.MandateAssemblyIdAttribute) e.GET("/private/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) // -- diff --git a/src/api/middleware/tableMiddleware.go b/src/api/middleware/tableMiddleware.go deleted file mode 100644 index 4b0218af..00000000 --- a/src/api/middleware/tableMiddleware.go +++ /dev/null @@ -1,35 +0,0 @@ -package middleware - -import ( - "fmt" - "gohan/api/models/dtos/errors" - "gohan/api/utils" - "net/http" - - "github.com/labstack/echo" -) - -/* -Echo middleware to ensure a valid `tableId` HTTP query parameter was provided -*/ -func MandateTableIdAttribute(next echo.HandlerFunc) echo.HandlerFunc { - return func(c echo.Context) error { - // check for tableId query parameter - tableId := c.QueryParam("tableId") - if len(tableId) == 0 { - // if no id was provided, or is invalid, return an error - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("missing table id")) - } - - // verify tableId is a valid UUID - // - assume it's a valid table id if it's a uuid, - // further verification is done later - if !utils.IsValidUUID(tableId) { - fmt.Printf("Invalid table id %s\n", tableId) - - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid table id %s - please provide a valid uuid", tableId))) - } - - return next(c) - } -} diff --git a/src/api/mvc/tables/main.go b/src/api/mvc/tables/main.go deleted file mode 100644 index 828c4254..00000000 --- a/src/api/mvc/tables/main.go +++ /dev/null @@ -1,324 +0,0 @@ -package tables - -import ( - "encoding/json" - "fmt" - "net/http" - "time" - - "gohan/api/contexts" - "gohan/api/models/constants" - "gohan/api/models/dtos" - "gohan/api/models/dtos/errors" - "gohan/api/models/indexes" - "gohan/api/mvc" - esRepo "gohan/api/repositories/elasticsearch" - "gohan/api/utils" - - "github.com/labstack/echo" - "github.com/mitchellh/mapstructure" -) - -func CreateTable(c echo.Context) error { - fmt.Printf("[%s] - CreateTable hit!\n", time.Now()) - - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - - decoder := json.NewDecoder(c.Request().Body) - var t dtos.CreateTableRequestDto - err := decoder.Decode(&t) - if err != nil { - return c.JSON(http.StatusBadRequest, map[string]interface{}{ - "error": err, - }) - } - - // TODO: improve verification - if t.Name == "" { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: "'name' cannot be empty", - }) - } else if t.Dataset == "" { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: "'dataset' cannot be empty", - }) - } else if t.DataType == "" { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: "'data_type' cannot be empty", - }) - } - - // ensure data_type is valid ('variant', etc..) - if !utils.StringInSlice(t.DataType, constants.ValidTableDataTypes) { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: fmt.Sprintf("Invalid data_type: %s -- Must be one of the following: %s", t.DataType, constants.ValidTableDataTypes), - }) - } - - // TODO: ensure dataset is a valid identifier (uuid ?) - - // avoid creating duplicate tables with the same name - existingTables, error := esRepo.GetTablesByName(cfg, es, c.Request().Context(), t.Name) - if error != nil { - return c.JSON(http.StatusInternalServerError, dtos.CreateTableResponseDto{ - Error: error.Error(), - }) - } - if len(existingTables) > 0 { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: fmt.Sprintf("A table with the name '%s' already exists", t.Name), - }) - } - - // call repository - table, error := esRepo.CreateTable(es, c.Request().Context(), t) - if error != nil { - return c.JSON(http.StatusInternalServerError, dtos.CreateTableResponseDto{ - Error: error.Error(), - }) - } - - return c.JSON(http.StatusOK, dtos.CreateTableResponseDto{ - Message: "Success", - Table: table, - }) -} - -func GetTables(c echo.Context) error { - fmt.Printf("[%s] - GetTables hit!\n", time.Now()) - - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - - // obtain tableId from the path - tableId := c.Param("id") - - // obtain dataTypes from query parameter - dataType := c.QueryParam("data-type") - - // at least one of these parameters must be present - if tableId == "" && dataType == "" { - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing both id and data type - please provide at least one of them")) - } else if dataType != "" { - // ensure data_type is valid ('variant', etc..) - if !utils.StringInSlice(dataType, constants.ValidTableDataTypes) { - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Invalid data_type: %s -- Must be one of the following: %s", dataType, constants.ValidTableDataTypes))) - } - } - - // call repository - results, _ := esRepo.GetTables(cfg, es, c.Request().Context(), tableId, dataType) - if results == nil { - // return empty result (assume there are no tables because the index doesn't exist) - return c.JSON(http.StatusOK, []map[string]interface{}{}) - } - // TODO: handle _ error better - - // gather data from "hits" - docsHits := results["hits"].(map[string]interface{})["hits"] - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - allSources := make([]indexes.Table, 0) - - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} to table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - } - - // accumulate structs - allSources = append(allSources, resultingTable) - } - - if tableId != "" && len(allSources) > 0 { - // assume there is only 1 document in the database with this `id` - // return a single object rather than the whole list - return c.JSON(http.StatusOK, allSources[0]) - } - - return c.JSON(http.StatusOK, allSources) -} - -func GetTableSummary(c echo.Context) error { - fmt.Printf("[%s] - GetTableSummary hit!\n", time.Now()) - - cfg := c.(*contexts.GohanContext).Config - - // obtain tableId from the path - tableId := c.Param("id") - // obtain other potentially relevant parameters from available query parameters - // (these should be empty, but utilizing this common function is convenient to set up - // the call to the variants index through the repository functions) - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, _ = mvc.RetrieveCommonElements(c) - // unused tableId from query parameter set to '_' - - // table id must be provided - if tableId == "" { - fmt.Println("Missing table id") - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing table id - please try again")) - } - - // call repository - // - get the table by id - results, getTablesError := esRepo.GetTables(cfg, es, c.Request().Context(), tableId, "") - if getTablesError != nil { - fmt.Printf("Failed to get tables with ID %s\n", tableId) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } - - // gather data from "hits" - docsHits := results["hits"].(map[string]interface{})["hits"] - if docsHits == nil { - fmt.Printf("No Tables with ID '%s' were found\n", tableId) - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Table with ID %s not found", tableId))) - } - - // obtain hits (expecting 1) - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - allSources := make([]interface{}, 0) - // var allSources []indexes.Variant - - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} to table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - } - - // accumulate structs - allSources = append(allSources, resultingTable) - } - - if len(allSources) == 0 { - fmt.Printf("Failed to get table summary with ID '%s'\n", tableId) - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Failed to get table summary with ID %s", tableId))) - } - - // obtain table id from the one expected hit - // and search for variants associated with it - - totalVariantsCount := 0.0 - - docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, - chromosome, lowerBound, upperBound, - "", "", // note : both variantId and sampleId are deliberately set to "" - reference, alternative, alleles, genotype, assemblyId, tableId) - if countError != nil { - fmt.Printf("Failed to count variants with table ID %s\n", tableId) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } - - totalVariantsCount = docs["count"].(float64) - - // obtain number of samples associated with this tableId - resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndTableId(cfg, es, "sample.id.keyword", tableId) - if bucketsError != nil { - fmt.Println(resultingBuckets) - } - - // retrieve aggregations.items.buckets - // and count number of samples - bucketsMapped := []interface{}{} - if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { - aggsMapped := aggs.(map[string]interface{}) - - if items, itemsOk := aggsMapped["items"]; itemsOk { - itemsMapped := items.(map[string]interface{}) - - if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { - bucketsMapped = buckets.([]interface{}) - } - } - } - - fmt.Printf("Successfully Obtained Table ID '%s' Summary \n", tableId) - - return c.JSON(http.StatusOK, &dtos.TableSummaryResponseDto{ - Count: int(totalVariantsCount), - DataTypeSpecific: map[string]interface{}{ - "samples": len(bucketsMapped), - }, - }) -} - -func DeleteTable(c echo.Context) error { - fmt.Printf("[%s] - DeleteTable hit!\n", time.Now()) - - // obtain tableId from the path - tableId := c.Param("id") - - // at least one of these parameters must be present - if tableId == "" { - fmt.Println("Missing table id") - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing table id - please try again")) - } - - // call repository - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - results, deleteError := esRepo.DeleteTableById(cfg, es, c.Request().Context(), tableId) - if deleteError != nil { - fmt.Printf("Failed to delete tables with ID %s\n", tableId) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } - - // gather 'deleted table' data from "deleted" - numDeleted := 0.0 - docsHits := results["deleted"] - if docsHits != nil { - numDeleted = docsHits.(float64) - } else { - fmt.Printf("No Tables with ID '%s' were deleted\n", tableId) - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Failed to delete tables with ID %s", tableId))) - } - if numDeleted == 0 { - fmt.Printf("No Tables with ID '%s' were deleted\n", tableId) - return c.JSON(http.StatusNotFound, errors.CreateSimpleNotFound(fmt.Sprintf("No table with ID %s", tableId))) - } - - // spin off the deletion of variants associated with - // the tableId provided in a go routine if the table - // was successfully deleted and assume the variants - // deletion completes successfully in the background - go func(_tableId string) { - var message string - - // delete variants associated with this table id - deletedVariants, deleteVariantsError := esRepo.DeleteVariantsByTableId(es, cfg, _tableId) - if deleteVariantsError != nil { - fmt.Printf("Failed to delete variants associated with table ID %s\n", tableId) - - // "do nothing" - return - } - - // successfully attempted to delete variants (if any) - - // get deletion details (if any) - deletedVariantsResults := deletedVariants["deleted"] - if deletedVariantsResults == nil { - message = fmt.Sprintf("Failed to delete variants associated with table ID %s", _tableId) - } else { - numDeletedVariants := int(deletedVariantsResults.(float64)) - message = fmt.Sprintf("Successfully deleted %d variants associated with table ID %s", numDeletedVariants, _tableId) - } - fmt.Println(message) - }(tableId) - // TODO: ensure that no variants exist without a valid tableId - - fmt.Printf("Successfully Deleted Table(s) with ID '%s' . Variants will be deleted in the background!\n", tableId) //numDeletedVariants - return c.NoContent(204) -} From b9254f86d7f2ed51034cca1a0ce41e0671920680 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:34:52 -0400 Subject: [PATCH 02/49] chore: removed dtos, constants, index/pseudo-fk, --- src/api/models/constants/main.go | 1 - src/api/models/dtos/main.go | 20 -------------------- src/api/models/indexes/main.go | 11 ----------- 3 files changed, 32 deletions(-) diff --git a/src/api/models/constants/main.go b/src/api/models/constants/main.go index 74884d7f..ebadb64b 100644 --- a/src/api/models/constants/main.go +++ b/src/api/models/constants/main.go @@ -1,6 +1,5 @@ package constants -var ValidTableDataTypes = []string{"variant"} var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"} /* diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index ee016f35..539c87df 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -67,26 +67,6 @@ type GenesResponseDTO struct { Results []indexes.Gene `json:"results"` // []Gene } -// -- Tables -type CreateTableRequestDto struct { - Name string `json:"name"` - DataType string `json:"data_type"` - Dataset string `json:"dataset"` - Metadata map[string]interface{} `json:"metadata"` // TODO: type-safety? -} -type CreateTableResponseDto struct { - // --- testing: combine dto with an index model - // - makes for a clean 'single-layer' json response object - indexes.Table - - Message string `json:"message,omitempty"` - Error string `json:"error,omitempty"` -} -type TableSummaryResponseDto struct { - Count int `json:"count"` - DataTypeSpecific map[string]interface{} `json:"data_type_specific"` // TODO: type-safety? -} - // -- Errors type GeneralErrorResponseDto struct { Status int `json:"status,omitempty"` diff --git a/src/api/models/indexes/main.go b/src/api/models/indexes/main.go index 12f94edd..8d6e2710 100644 --- a/src/api/models/indexes/main.go +++ b/src/api/models/indexes/main.go @@ -18,7 +18,6 @@ type Variant struct { Sample Sample `json:"sample"` FileId string `json:"fileId"` - TableId string `json:"tableId"` AssemblyId c.AssemblyId `json:"assemblyId"` } @@ -55,13 +54,3 @@ type Gene struct { End int `json:"end"` AssemblyId c.AssemblyId `json:"assemblyId"` } - -type Table struct { - Id string `json:"id,omitempty"` // TODO: UUID ? - Name string `json:"name,omitempty"` - DataType string `json:"data_type,omitempty"` - Dataset string `json:"dataset,omitempty"` - AssemblyIds []string `json:"assembly_ids,omitempty"` - Metadata map[string]interface{} `json:"metadata,omitempty"` // TODO: type-safety? - Schema map[string]interface{} `json:"schema,omitempty"` -} From c47d426eb64fe080577a06ef502d2da634b61fda Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:37:17 -0400 Subject: [PATCH 03/49] patch: metadata schema --- src/api/models/schemas/schemas.go | 4 ++-- src/api/mvc/data-types/main.go | 3 +-- src/api/mvc/main.go | 10 ++-------- src/api/services/variants/main.go | 6 +----- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/api/models/schemas/schemas.go b/src/api/models/schemas/schemas.go index 822c1226..7e520ea6 100644 --- a/src/api/models/schemas/schemas.go +++ b/src/api/models/schemas/schemas.go @@ -7,8 +7,8 @@ import ( type Schema map[string]interface{} -var VARIANT_TABLE_METADATA_SCHEMA Schema = map[string]interface{}{ - "$id": "variant:table_metadata", // TODO: Real ID +var VARIANT_METADATA_SCHEMA Schema = map[string]interface{}{ + "$id": "variant:metadata", // TODO: Real ID "$schema": "http://json-schema.org/draft-07/schema#", "description": "Bento variant data type metadata schema", "type": "object", diff --git a/src/api/mvc/data-types/main.go b/src/api/mvc/data-types/main.go index 16ca0230..299ee94f 100644 --- a/src/api/mvc/data-types/main.go +++ b/src/api/mvc/data-types/main.go @@ -18,7 +18,6 @@ var variantDataTypeJson = map[string]interface{}{ "schema": schemas.VARIANT_SCHEMA, } -// "metadata_schema": schemas.VARIANT_TABLE_METADATA_SCHEMA, func GetDataTypes(c echo.Context) error { es := c.(*contexts.GohanContext).Es7Client cfg := c.(*contexts.GohanContext).Config @@ -44,7 +43,7 @@ func GetVariantDataTypeSchema(c echo.Context) error { } func GetVariantDataTypeMetadataSchema(c echo.Context) error { - return c.JSON(http.StatusOK, schemas.VARIANT_TABLE_METADATA_SCHEMA) + return c.JSON(http.StatusOK, schemas.VARIANT_METADATA_SCHEMA) } // - helpers diff --git a/src/api/mvc/main.go b/src/api/mvc/main.go index 5d7736a1..0c61cf6b 100644 --- a/src/api/mvc/main.go +++ b/src/api/mvc/main.go @@ -13,7 +13,7 @@ import ( "github.com/labstack/echo" ) -func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId, string) { +func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId) { es := c.(*contexts.GohanContext).Es7Client chromosome := c.QueryParam("chromosome") @@ -79,11 +79,5 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, assemblyId = a.CastToAssemblyId(assemblyIdQP) } - tableId := c.QueryParam("tableId") - if len(tableId) == 0 { - // if no tableId is provided, assume "wildcard" search - tableId = "*" - } - - return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, tableId + return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId } diff --git a/src/api/services/variants/main.go b/src/api/services/variants/main.go index 4ed9afce..eac9bfe7 100644 --- a/src/api/services/variants/main.go +++ b/src/api/services/variants/main.go @@ -31,7 +31,7 @@ func GetVariantsOverview(es *elasticsearch.Client, cfg *models.Config) map[strin callGetBucketsByKeyword := func(key string, keyword string, _wg *sync.WaitGroup) { defer _wg.Done() - results, bucketsError := esRepo.GetVariantsBucketsByKeywordAndTableId(cfg, es, keyword, "") + results, bucketsError := esRepo.GetVariantsBucketsByKeyword(cfg, es, keyword) if bucketsError != nil { resultsMux.Lock() defer resultsMux.Unlock() @@ -86,10 +86,6 @@ func GetVariantsOverview(es *elasticsearch.Client, cfg *models.Config) map[strin wg.Add(1) go callGetBucketsByKeyword("assemblyIDs", "assemblyId.keyword", &wg) - // get distribution of table IDs - wg.Add(1) - go callGetBucketsByKeyword("tableIDs", "tableId.keyword", &wg) - wg.Wait() return resultsMap From aeedbf5153c848a8304d02966229a387ab23ae66 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:38:39 -0400 Subject: [PATCH 04/49] chore: removed tables es layer --- src/api/repositories/elasticsearch/tables.go | 359 ------------------ .../repositories/elasticsearch/variants.go | 96 +---- 2 files changed, 3 insertions(+), 452 deletions(-) delete mode 100644 src/api/repositories/elasticsearch/tables.go diff --git a/src/api/repositories/elasticsearch/tables.go b/src/api/repositories/elasticsearch/tables.go deleted file mode 100644 index b2abc82a..00000000 --- a/src/api/repositories/elasticsearch/tables.go +++ /dev/null @@ -1,359 +0,0 @@ -package elasticsearch - -import ( - // "gohan/api/contexts" - "bytes" - "context" - "crypto/tls" - "encoding/json" - "errors" - "fmt" - "gohan/api/models" - "gohan/api/models/dtos" - "gohan/api/models/indexes" - "gohan/api/models/schemas" - "gohan/api/utils" - "log" - "net/http" - "reflect" - "strings" - "time" - - es7 "github.com/elastic/go-elasticsearch/v7" - - "github.com/elastic/go-elasticsearch/esapi" - "github.com/google/uuid" - "github.com/mitchellh/mapstructure" -) - -const tablesIndex = "tables" - -func CreateTable(es *es7.Client, ctxt context.Context, t dtos.CreateTableRequestDto) (indexes.Table, error) { - - now := time.Now() - - // TODO: improve checks and balances.. - - // merge inbound metadata if any - defaultMeta := map[string]interface{}{ - "created_at": now, - "updated_at": now, - "name": t.Name, - } - - defaultAssemblyIds := []string{ - "GRCh38", - "GRCh37", - "NCBI36", - "Other", - } - - // Create struct instance of the Elasticsearch fields struct object - docStruct := indexes.Table{ - Id: uuid.New().String(), - Name: t.Name, - DataType: t.DataType, - Dataset: t.Dataset, - AssemblyIds: defaultAssemblyIds, - Metadata: defaultMeta, - Schema: schemas.VARIANT_SCHEMA, - } - - fmt.Println("\ndocStruct:", docStruct) - fmt.Println("docStruct TYPE:", reflect.TypeOf(docStruct)) - - // Marshal the struct to JSON and check for errors - b, err := json.Marshal(docStruct) - if err != nil { - fmt.Println("json.Marshal ERROR:", err) - return docStruct, err - } - - // Instantiate a request object - req := esapi.IndexRequest{ - Index: tablesIndex, - Body: strings.NewReader(string(b)), - Refresh: "true", - } - fmt.Println(reflect.TypeOf(req)) - - // Return an API response object from request - res, err := req.Do(ctxt, es) - if err != nil { - fmt.Printf("IndexRequest ERROR: %s\n", err) - return docStruct, err - } - defer res.Body.Close() - - if res.IsError() { - msg := fmt.Sprintf("%s ERROR", res.Status()) - fmt.Println(msg) - return docStruct, errors.New(msg) - } else { - - // Deserialize the response into a map. - var resMap map[string]interface{} - if err := json.NewDecoder(res.Body).Decode(&resMap); err != nil { - log.Printf("Error parsing the response body: %s", err) - } else { - log.Printf("\nIndexRequest() RESPONSE:") - // Print the response status and indexed document version. - fmt.Println("Status:", res.Status()) - fmt.Println("Result:", resMap["result"]) - fmt.Println("Version:", int(resMap["_version"].(float64))) - fmt.Println("resMap:", resMap) - fmt.Println() - } - } - - return docStruct, nil -} - -func GetTables(cfg *models.Config, es *es7.Client, ctxt context.Context, tableId string, dataType string) (map[string]interface{}, error) { - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - - // return GetTablesWithoutContext(es, tableId, dataType) - // get table by "any combination of any applicable parameter" query structure - filter := make([]map[string]interface{}, 0) - - if tableId != "" { - - filter = append(filter, map[string]interface{}{ - "term": map[string]string{ - "id.keyword": tableId, - }, - }) - } - if dataType != "" { - filter = append(filter, map[string]interface{}{ - "term": map[string]string{ - "data_type.keyword": dataType, - }, - }) - } - // if `filter` remains an empty array, this will effecetively act as a "wildcard" query - - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "bool": map[string]interface{}{ - "filter": filter, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return nil, err - } - // Perform the search request. - res, searchErr := es.Search( - es.Search.WithContext(context.Background()), - es.Search.WithIndex(tablesIndex), - es.Search.WithBody(&buf), - es.Search.WithTrackTotalHits(true), - es.Search.WithPretty(), - ) - if searchErr != nil { - fmt.Printf("Error getting response: %s\n", searchErr) - return nil, searchErr - } - - defer res.Body.Close() - - resultString := res.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // Declared an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - // umErr := json.Unmarshal([]byte(resultString[9:]), &result) - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling response: %s\n", umErr) - return nil, umErr - } - - fmt.Printf("Query End: %s\n", time.Now()) - - return result, nil - -} - -func GetTablesByName(cfg *models.Config, es *es7.Client, ctxt context.Context, tableName string) ([]indexes.Table, error) { - - allTables := make([]indexes.Table, 0) - - // overall query structure - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "bool": map[string]interface{}{ - "filter": []map[string]interface{}{{ - "term": map[string]interface{}{ - "name.keyword": tableName, - }, - }}, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return allTables, err - } - - if cfg.Debug { - // view the outbound elasticsearch query - myString := string(buf.Bytes()[:]) - fmt.Println(myString) - } - - fmt.Printf("Query Start: %s\n", time.Now()) - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - // Perform the search request. - res, searchErr := es.Search( - es.Search.WithContext(context.Background()), - es.Search.WithIndex(tablesIndex), - es.Search.WithBody(&buf), - es.Search.WithTrackTotalHits(true), - es.Search.WithPretty(), - ) - if searchErr != nil { - fmt.Printf("Error getting response: %s\n", searchErr) - return allTables, searchErr - } - - defer res.Body.Close() - - resultString := res.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // TODO: improve stability - // - check for 404 Not Found : assume index simply doesnt exist, return 0 results - if strings.Contains(resultString[0:15], "Not Found") { - return allTables, nil - } - - // Declared an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling response: %s\n", umErr) - return allTables, umErr - } - - fmt.Printf("Query End: %s\n", time.Now()) - - // gather data from "hits" - docsHits := result["hits"].(map[string]interface{})["hits"] - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} a table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - } - - // accumulate structs - allTables = append(allTables, resultingTable) - } - - return allTables, nil -} - -func DeleteTableById(cfg *models.Config, es *es7.Client, ctxt context.Context, tableId string) (map[string]interface{}, error) { - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "match": map[string]interface{}{ - "id": tableId, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return nil, err - } - - if cfg.Debug { - // view the outbound elasticsearch query - myString := string(buf.Bytes()[:]) - fmt.Println(myString) - } - - // Perform the delete request. - deleteRes, deleteErr := es.DeleteByQuery( - []string{tablesIndex}, - bytes.NewReader(buf.Bytes()), - ) - if deleteErr != nil { - fmt.Printf("Error getting response: %s\n", deleteErr) - return nil, deleteErr - } - - defer deleteRes.Body.Close() - - resultString := deleteRes.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // Prepare an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the empty interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - // umErr := json.Unmarshal([]byte(resultString[9:]), &result) - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling gene search response: %s\n", umErr) - return nil, umErr - } - - return result, nil -} diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index f777a6d3..1747d653 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -20,7 +20,6 @@ import ( "gohan/api/utils" "github.com/elastic/go-elasticsearch/v7" - es7 "github.com/elastic/go-elasticsearch/v7" ) const wildcardVariantsIndex = "variants-*" @@ -111,7 +110,7 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e reference string, alternative string, alleles []string, size int, sortByPosition c.SortDirection, includeInfoInResultSet bool, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, tableId string, + genotype c.GenotypeQuery, assemblyId c.AssemblyId, getSampleIdsOnly bool) (map[string]interface{}, error) { // begin building the request body. @@ -171,13 +170,6 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e }) } - if tableId != "" { - mustMap = append(mustMap, map[string]interface{}{ - "query_string": map[string]interface{}{ - "query": "tableId:" + tableId, - }}) - } - rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? @@ -325,7 +317,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, chromosome string, lowerBound int, upperBound int, variantId string, sampleId string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, tableId string) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ @@ -385,14 +377,6 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }, }) } - - if tableId != "" { - mustMap = append(mustMap, map[string]interface{}{ - "query_string": map[string]interface{}{ - "query": "tableId:" + tableId, - }}) - } - rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? @@ -507,7 +491,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, return result, nil } -func GetVariantsBucketsByKeywordAndTableId(cfg *models.Config, es *elasticsearch.Client, keyword string, tableId string) (map[string]interface{}, error) { +func GetVariantsBucketsByKeyword(cfg *models.Config, es *elasticsearch.Client, keyword string) (map[string]interface{}, error) { // begin building the request body. var buf bytes.Buffer aggMap := map[string]interface{}{ @@ -525,14 +509,6 @@ func GetVariantsBucketsByKeywordAndTableId(cfg *models.Config, es *elasticsearch }, } - if tableId != "" { - aggMap["query"] = map[string]interface{}{ - "match": map[string]interface{}{ - "tableId": tableId, - }, - } - } - // encode the query if err := json.NewEncoder(&buf).Encode(aggMap); err != nil { log.Fatalf("Error encoding aggMap: %s\n", err) @@ -589,72 +565,6 @@ func GetVariantsBucketsByKeywordAndTableId(cfg *models.Config, es *elasticsearch return result, nil } -func DeleteVariantsByTableId(es *es7.Client, cfg *models.Config, tableId string) (map[string]interface{}, error) { - - // cfg := c.(*contexts.GohanContext).Config - // es := c.(*contexts.GohanContext).Es7Client - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "match": map[string]interface{}{ - "tableId": tableId, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return nil, err - } - - if cfg.Debug { - // view the outbound elasticsearch query - myString := string(buf.Bytes()[:]) - fmt.Println(myString) - } - - // Perform the delete request. - deleteRes, deleteErr := es.DeleteByQuery( - []string{wildcardVariantsIndex}, - bytes.NewReader(buf.Bytes()), - ) - if deleteErr != nil { - fmt.Printf("Error getting response: %s\n", deleteErr) - return nil, deleteErr - } - - defer deleteRes.Body.Close() - - resultString := deleteRes.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // Prepare an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the empty interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling gene search response: %s\n", umErr) - return nil, umErr - } - - return result, nil -} - // -- internal use only -- func addAllelesToShouldMap(alleles []string, genotype c.GenotypeQuery, allelesShouldMap []map[string]interface{}) ([]map[string]interface{}, int) { minimumShouldMatch := 0 From 3fe7247a8bd3b4eaa857d9426442d394b04bb563 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:39:15 -0400 Subject: [PATCH 05/49] chore: patch ingestion, santitation, workflows --- src/api/mvc/variants/main.go | 16 +- src/api/services/ingestion.go | 3 +- src/api/services/sanitation/main.go | 75 +---- .../tests/integration/api/api_table_test.go | 293 ------------------ src/api/workflows/vcf_gz.wdl | 5 +- 5 files changed, 11 insertions(+), 381 deletions(-) delete mode 100644 src/api/tests/integration/api/api_table_test.go diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 0922f0b9..f5275f99 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -207,8 +207,6 @@ func VariantsIngest(c echo.Context) error { } assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId")) - tableId := c.QueryParam("tableId") - // TODO: validate table exists in elasticsearch // -- optional filter var ( @@ -410,7 +408,7 @@ func VariantsIngest(c echo.Context) error { // --- load vcf into memory and ingest the vcf file into elasticsearch beginProcessingTime := time.Now() fmt.Printf("Begin processing %s at [%s]\n", gzippedFilePath, beginProcessingTime) - ingestionService.ProcessVcf(gzippedFilePath, drsFileId, tableId, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) + ingestionService.ProcessVcf(gzippedFilePath, drsFileId, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) fmt.Printf("Ingest duration for file at %s : %s\n", gzippedFilePath, time.Since(beginProcessingTime)) reqStat.State = ingest.Done @@ -450,7 +448,7 @@ func GetAllVariantIngestionRequests(c echo.Context) error { func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, tableId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) // retrieve other query parameters relevent to this 'get' query --- getSampleIdsOnlyQP := c.QueryParam("getSampleIdsOnly") @@ -536,7 +534,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu _id, "", // note : "" is for sampleId reference, alternative, alleles, size, sortByPosition, - includeInfoInResultSet, genotype, assemblyId, tableId, + includeInfoInResultSet, genotype, assemblyId, getSampleIdsOnly) } else { @@ -561,7 +559,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu "", _id, // note : "" is for variantId reference, alternative, alleles, size, sortByPosition, - includeInfoInResultSet, genotype, assemblyId, tableId, + includeInfoInResultSet, genotype, assemblyId, false) } @@ -689,7 +687,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, tableId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) respDTO := dtos.VariantCountReponse{ Results: make([]dtos.VariantCountResult, 0), @@ -719,7 +717,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, _id, "", // note : "" is for sampleId - reference, alternative, alleles, genotype, assemblyId, tableId) + reference, alternative, alleles, genotype, assemblyId) } else { // implied sampleId query fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) @@ -728,7 +726,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, "", _id, // note : "" is for variantId - reference, alternative, alleles, genotype, assemblyId, tableId) + reference, alternative, alleles, genotype, assemblyId) } if countError != nil { diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 07e35551..117c0ddd 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -341,7 +341,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto } func (i *IngestionService) ProcessVcf( - gzippedFilePath string, drsFileId string, tableId string, + gzippedFilePath string, drsFileId string, assemblyId constants.AssemblyId, filterOutReferences bool, lineProcessingConcurrencyLevel int) { @@ -419,7 +419,6 @@ func (i *IngestionService) ProcessVcf( tmpVariant["fileId"] = drsFileId tmpVariant["assemblyId"] = assemblyId - tmpVariant["tableId"] = tableId // skip this call if need be skipThisCall := false diff --git a/src/api/services/sanitation/main.go b/src/api/services/sanitation/main.go index 7b24a582..4b15cf6d 100644 --- a/src/api/services/sanitation/main.go +++ b/src/api/services/sanitation/main.go @@ -1,20 +1,13 @@ package sanitation import ( - "context" - "encoding/json" "fmt" "time" es7 "github.com/elastic/go-elasticsearch/v7" "github.com/go-co-op/gocron" - "github.com/mitchellh/mapstructure" "gohan/api/models" - "gohan/api/models/indexes" - esRepo "gohan/api/repositories/elasticsearch" - - variantsService "gohan/api/services/variants" ) type ( @@ -46,7 +39,7 @@ func (ss *SanitationService) Init() { // context, that would mean performing something like // - removing duplicate documents // - cleaning documents that have broken pseudo-foreign keys - // - variants -> tables + // - variants -> tables (no longer necessary) // etc... go func() { // setup cron job @@ -54,71 +47,7 @@ func (ss *SanitationService) Init() { // clean variant documents with non-existing tables s.Every(1).Days().At("04:00:00").Do(func() { // 12am EST - fmt.Printf("[%s] - Running variant documents cleanup..\n", time.Now()) - - // - get all available tables - tables, tablesError := esRepo.GetTables(ss.Config, ss.Es7Client, context.Background(), "", "variant") - if tablesError != nil { - fmt.Printf("[%s] - Error getting tables : %v..\n", time.Now(), tablesError) - return - } - - // gather data from "hits" - docsHits := tables["hits"].(map[string]interface{})["hits"] - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - tableIds := make([]string, 0) - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} to table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - continue - } - - // accumulate structs - tableIds = append(tableIds, resultingTable.Id) - } - fmt.Printf("[%s] - Table IDs found : %v..\n", time.Now(), tableIds) - - // - obtain distribution of table IDs accross all variants - // TODO: refactor not use variants-mvc package to access this (anti-pattern) - variantsOverview := variantsService.GetVariantsOverview(ss.Es7Client, ss.Config) - if variantsOverview == nil { - return - } - if variantsOverview["tableIDs"] == nil { - return - } - - variantTableIdsCountsMap := variantsOverview["tableIDs"].(map[string]interface{}) - - variantTableIds := make([]string, 0) - for _variantTableId, _ := range variantTableIdsCountsMap { - // ignore variant count set to _ - - // accumulate IDs found in list - variantTableIds = append(variantTableIds, _variantTableId) - } - fmt.Printf("[%s] - Tables IDs found across all variants : %v..\n", time.Now(), variantTableIds) - - // obtain set-difference between variant-table IDs table IDs - setDiff := setDifference(tableIds, variantTableIds) - fmt.Printf("[%s] - Variant Table ID Difference: %v..\n", time.Now(), setDiff) - - // delete variants with table IDs found in this set difference - for _, differentId := range setDiff { - // TODO: refactor - // fire and forget - go func(_differentId string) { - _, _ = esRepo.DeleteVariantsByTableId(ss.Es7Client, ss.Config, _differentId) - }(differentId) - } + // nothing for now }) // starts the scheduler in blocking mode, which blocks diff --git a/src/api/tests/integration/api/api_table_test.go b/src/api/tests/integration/api/api_table_test.go deleted file mode 100644 index 28fce735..00000000 --- a/src/api/tests/integration/api/api_table_test.go +++ /dev/null @@ -1,293 +0,0 @@ -package api - -import ( - "bytes" - "encoding/json" - "fmt" - "gohan/api/models" - "gohan/api/models/dtos" - "gohan/api/models/indexes" - common "gohan/api/tests/common" - "gohan/api/utils" - "io/ioutil" - "net/http" - "testing" - - "github.com/stretchr/testify/assert" -) - -const ( - GetVariantTablesPath string = "%s/tables?data-type=variant" - GetTableByIdPathWithPlaceholder string = "%s/tables/%s" - GetTableSummaryByIdPathWithPlaceholder string = "%s/tables/%s/summary" - DeleteTableByIdPathWithPlaceholder string = "%s/tables/%s" - PostCreateTablePath string = "%s/tables" -) - -func TestCanGetVariantTables(t *testing.T) { - cfg := common.InitConfig() - - // get all available 'variant' tables - allTableDtos := getVariantTables(t, cfg) - assert.NotNil(t, allTableDtos) -} - -func TestCanCreateTable(t *testing.T) { - cfg := common.InitConfig() - - // create table - createTablesRespJson := createVariantTable(t, cfg) - - // test get-by-id with newly created table - newTableId := createTablesRespJson.Id - getTableByIdUrl := fmt.Sprintf(GetTableByIdPathWithPlaceholder, cfg.Api.Url, newTableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("GET", getTableByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableByIdUrl, response.Status, shouldBe)) - - // -- interpret array of available tables from response as a serialized json byte string - tableRespBody, tableRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, tableRespBodyErr) - - // --- transform body bytes to string - tableRespBodyString := string(tableRespBody) - - // -- check for json error - var getTableByIdResp indexes.Table - getTableByIdRespUnmarshallingError := json.Unmarshal([]byte(tableRespBodyString), &getTableByIdResp) - assert.Nil(t, getTableByIdRespUnmarshallingError) - - // ================ - - // -- ensure the table ids are the same - assert.True(t, getTableByIdResp.Id == newTableId) - -} - -func TestCanGetAllTablesById(t *testing.T) { - cfg := common.InitConfig() - - allTableDtos := getVariantTables(t, cfg) - assert.NotNil(t, allTableDtos) - assert.True(t, len(allTableDtos) > 0) - - for _, table := range allTableDtos { - - tableId := table.Id - getTableByIdUrl := fmt.Sprintf(GetTableByIdPathWithPlaceholder, cfg.Api.Url, tableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("GET", getTableByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableByIdUrl, response.Status, shouldBe)) - - // -- interpret array of available tables from response - tableRespBody, tableRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, tableRespBodyErr) - - // --- transform body bytes to string - tableRespBodyString := string(tableRespBody) - - // -- check for json error - var tablesRespJson indexes.Table - tableJsonUnmarshallingError := json.Unmarshal([]byte(tableRespBodyString), &tablesRespJson) - assert.Nil(t, tableJsonUnmarshallingError) - - // ================ - - // -- ensure the table ids are the same - assert.True(t, tablesRespJson.Id == tableId) - } -} -func TestCannotGetTablesWithInvalidIds(t *testing.T) { - cfg := common.InitConfig() - - // test with an empty id, and a random string - // both cases should result in a 400 bad request - for _, invalidTableId := range []string{"", utils.RandomString(32)} { - getTableSummaryByIdUrl := fmt.Sprintf(GetTableSummaryByIdPathWithPlaceholder, cfg.Api.Url, invalidTableId) - - request, _ := http.NewRequest("GET", getTableSummaryByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - shouldBe := 400 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableSummaryByIdUrl, response.Status, shouldBe)) - } -} - -func TestCanGetAllTableSummariesById(t *testing.T) { - cfg := common.InitConfig() - - allTableDtos := getVariantTables(t, cfg) - assert.NotNil(t, allTableDtos) - assert.True(t, len(allTableDtos) > 0) - - for _, table := range allTableDtos { - - tableId := table.Id - getTableSummaryByIdUrl := fmt.Sprintf(GetTableSummaryByIdPathWithPlaceholder, cfg.Api.Url, tableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("GET", getTableSummaryByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableSummaryByIdUrl, response.Status, shouldBe)) - - // -- interpret array of available tables from response - tableSummaryRespBody, tableSummaryRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, tableSummaryRespBodyErr) - - // --- transform body bytes to string - tableSummaryRespBodyString := string(tableSummaryRespBody) - - // -- check for json error - var tableSummary dtos.TableSummaryResponseDto - tableJsonUnmarshallingError := json.Unmarshal([]byte(tableSummaryRespBodyString), &tableSummary) - assert.Nil(t, tableJsonUnmarshallingError) - - // ================ - - // -- ensure table summary is valid - assert.NotNil(t, tableSummary.Count) - assert.NotNil(t, tableSummary.DataTypeSpecific) - } -} - -func TestCanDeleteTableById(t *testing.T) { - cfg := common.InitConfig() - - // create table - createTablesRespJson := createVariantTable(t, cfg) - - // test get-by-id with newly created table - newTableId := createTablesRespJson.Id - deleteTableByIdUrl := fmt.Sprintf(DeleteTableByIdPathWithPlaceholder, cfg.Api.Url, newTableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("DELETE", deleteTableByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - shouldBe := 204 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api DELETE %s Status: %s ; Should be %d", deleteTableByIdUrl, response.Status, shouldBe)) - - // ================ -} - -func getVariantTables(_t *testing.T, _cfg *models.Config) []indexes.Table { - url := fmt.Sprintf(GetVariantTablesPath, _cfg.Api.Url) - request, _ := http.NewRequest("GET", url, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) - - // -- interpret array of available tables from response - overviewRespBody, overviewRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, overviewRespBodyErr) - - // --- transform body bytes to string - overviewRespBodyString := string(overviewRespBody) - - // -- check for json error - var tableDtos []indexes.Table - overviewJsonUnmarshallingError := json.Unmarshal([]byte(overviewRespBodyString), &tableDtos) - assert.Nil(_t, overviewJsonUnmarshallingError) - - return tableDtos -} - -func createVariantTable(_t *testing.T, _cfg *models.Config) dtos.CreateTableResponseDto { - // prepare request - postCreateTableUrl := fmt.Sprintf(PostCreateTablePath, _cfg.Api.Url) - data := dtos.CreateTableRequestDto{ - Name: utils.RandomString(32), // random table name - DataType: "variant", // set variant data_type - Dataset: utils.RandomString(32), // random dataset name - Metadata: map[string]interface{}{}, // TODO : expand ? - } - dataBytes, err := json.Marshal(data) - if err != nil { - panic(err) - } - dataString := string(dataBytes) - - r, _ := http.NewRequest("POST", postCreateTableUrl, bytes.NewBufferString(dataString)) - r.Header.Add("Content-Type", "application/json") - - // perform request - client := &http.Client{} - resp, err := client.Do(r) - if err != nil { - fmt.Printf("Table Creation error: %s\n", err) - } - defer resp.Body.Close() - - fmt.Printf("Table Creation status: %d\n", resp.StatusCode) - - // obtain the newly created table - // -- interpret create-table dto from response - createTableRespBody, createTableRespBodyErr := ioutil.ReadAll(resp.Body) - assert.Nil(_t, createTableRespBodyErr) - - // --- transform body bytes to string - createTableRespBodyString := string(createTableRespBody) - - // -- check for json error - var createTablesRespJson dtos.CreateTableResponseDto - createTableJsonUnmarshallingError := json.Unmarshal([]byte(createTableRespBodyString), &createTablesRespJson) - assert.Nil(_t, createTableJsonUnmarshallingError) - - // -- ensure table was successfully created - assert.Empty(_t, createTablesRespJson.Error) - - assert.NotNil(_t, createTablesRespJson.Table) - assert.NotNil(_t, createTablesRespJson.Table.Id) - assert.NotEmpty(_t, createTablesRespJson.Table.Id) - - return createTablesRespJson -} diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index fd820d61..0944b975 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -3,7 +3,6 @@ workflow vcf_gz { Array[File] vcf_gz_file_names # redundant Array[String] original_vcf_gz_file_paths String assembly_id - String table_id String filter_out_references String temp_token String temp_token_host @@ -14,7 +13,6 @@ workflow vcf_gz { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, - table_id = table_id, filter_out_references = filter_out_references, temp_token = temp_token, temp_token_host = temp_token_host @@ -27,7 +25,6 @@ task vcf_gz_gohan { String gohan_url String vcf_gz_file_name String assembly_id - String table_id String filter_out_references String temp_token String temp_token_host @@ -35,7 +32,7 @@ task vcf_gz_gohan { command { echo "Using temporary-token : ${temp_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&tableId=${table_id}&filterOutReferences=${filter_out_references}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&filterOutReferences=${filter_out_references}" # TODO: refactor # append temporary-token header if present From 7796caa104c03bb6d03f20c79acad38ac6ba6b97 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:39:24 -0400 Subject: [PATCH 06/49] patch: readme --- README.md | 137 +----------------------------------------------------- 1 file changed, 1 insertion(+), 136 deletions(-) diff --git a/README.md b/README.md index 0ce64f21..dc60ffe3 100644 --- a/README.md +++ b/README.md @@ -77,25 +77,10 @@ # view catalogue curl -k https://gohan.local/genes/overview - - # create table - DATA='{ - "name": "Gohan Box Test Table", - "data_type": "variant", - "dataset": "00000000-0000-0000-0000-000000000000", - "metadata": {} - }' - curl -k -0 -v -X POST https://gohan.local/tables \ - -H 'Content-Type:application/json' \ - --data "$(echo $DATA)" | jq - - # - - # move vcf.gz files to `$GOHAN_API_VCF_PATH` # ingest vcf.gz - curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true\&tableId= + curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true # monitor progress: curl -k https://gohan.local/variants/ingestion/requests @@ -483,126 +468,6 @@ Response
-**`/tables`** - -
- - -Request ->   **GET** `/tables`
- -
- -Response ->```json -> [ -> { -> "id": `string`, -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "assembly_ids": `[]string`, -> "metadata": {...}, -> "schema": {...}, -> }, -> ... -> ] -> ``` - -
-
- - -Request ->   **POST** `/tables`
->```json -> { -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "metadata": {...}, -> } -> ``` - -
- -Response ->```json -> { -> "id": `string`, -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "assembly_ids": `[]string`, -> "metadata": {...}, -> "schema": {...}, -> } -> ``` - - -
-
- - -Request ->   **GET** `/tables/:id`
->    path params: -> - id : **string (UUID)** `(required)` - -
- -Response ->```json -> { -> "id": `string`, -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "assembly_ids": `[]string`, -> "metadata": {...}, -> "schema": {...}, -> } -> ``` - -
-
- - -Request ->   **GET** `/tables/:id/summary`
->    path params: -> - id : **string (UUID)** `(required)` - -
- -Response ->```json -> { -> "count": `int`, -> "data_type_specific": {...}, -> } -> ``` - -
-
- - -Request ->   **DELETE** `/tables/:id`
->    path params: -> - id : **string (UUID)** `(required)` - -
- -Response - -`Status Code:` **204** - -
-
- - - ## Deployments : From db59ca81214ecbc41cda7899a3cf1c038bddee8e Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 6 Jun 2023 17:43:06 -0400 Subject: [PATCH 07/49] chore: begin associating variants with a dataset --- src/api/main.go | 6 +++-- src/api/middleware/datasetMiddleware.go | 35 +++++++++++++++++++++++++ src/api/models/indexes/main.go | 1 + src/api/mvc/variants/main.go | 3 ++- src/api/services/ingestion.go | 3 ++- src/api/workflows/vcf_gz.wdl | 5 +++- 6 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 src/api/middleware/datasetMiddleware.go diff --git a/src/api/main.go b/src/api/main.go index 9bf35377..eeadc496 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -175,13 +175,15 @@ func main() { // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute) + gam.MandateAssemblyIdAttribute, + gam.MandateDatasetAttribute) e.GET("/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) e.GET("/variants/ingestion/stats", variantsMvc.VariantsIngestionStats) e.GET("/private/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute) + gam.MandateAssemblyIdAttribute, + gam.MandateDatasetAttribute) e.GET("/private/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) // -- diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go new file mode 100644 index 00000000..a86ddb59 --- /dev/null +++ b/src/api/middleware/datasetMiddleware.go @@ -0,0 +1,35 @@ +package middleware + +import ( + "fmt" + "gohan/api/models/dtos/errors" + "gohan/api/utils" + "net/http" + + "github.com/labstack/echo" +) + +/* +Echo middleware to ensure a valid `dataset` HTTP query parameter was provided +*/ +func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + // check for dataset query parameter + dataset := c.QueryParam("dataset") + if len(dataset) == 0 { + // if no id was provided, or is invalid, return an error + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("missing dataset")) + } + + // verify dataset is a valid UUID + // - assume it's a valid dataset if it's a uuid, + // further verification is done later + if !utils.IsValidUUID(dataset) { + fmt.Printf("Invalid dataset %s\n", dataset) + + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) + } + + return next(c) + } +} diff --git a/src/api/models/indexes/main.go b/src/api/models/indexes/main.go index 8d6e2710..a53d1017 100644 --- a/src/api/models/indexes/main.go +++ b/src/api/models/indexes/main.go @@ -18,6 +18,7 @@ type Variant struct { Sample Sample `json:"sample"` FileId string `json:"fileId"` + Dataset string `json:"dataset"` AssemblyId c.AssemblyId `json:"assemblyId"` } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index f5275f99..522f64b4 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -207,6 +207,7 @@ func VariantsIngest(c echo.Context) error { } assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId")) + dataset := c.QueryParam("dataset") // -- optional filter var ( @@ -408,7 +409,7 @@ func VariantsIngest(c echo.Context) error { // --- load vcf into memory and ingest the vcf file into elasticsearch beginProcessingTime := time.Now() fmt.Printf("Begin processing %s at [%s]\n", gzippedFilePath, beginProcessingTime) - ingestionService.ProcessVcf(gzippedFilePath, drsFileId, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) + ingestionService.ProcessVcf(gzippedFilePath, drsFileId, dataset, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) fmt.Printf("Ingest duration for file at %s : %s\n", gzippedFilePath, time.Since(beginProcessingTime)) reqStat.State = ingest.Done diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 117c0ddd..fa524a55 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -341,7 +341,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto } func (i *IngestionService) ProcessVcf( - gzippedFilePath string, drsFileId string, + gzippedFilePath string, drsFileId string, dataset string, assemblyId constants.AssemblyId, filterOutReferences bool, lineProcessingConcurrencyLevel int) { @@ -419,6 +419,7 @@ func (i *IngestionService) ProcessVcf( tmpVariant["fileId"] = drsFileId tmpVariant["assemblyId"] = assemblyId + tmpVariant["dataset"] = dataset // skip this call if need be skipThisCall := false diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 0944b975..603b54c1 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -3,6 +3,7 @@ workflow vcf_gz { Array[File] vcf_gz_file_names # redundant Array[String] original_vcf_gz_file_paths String assembly_id + String dataset String filter_out_references String temp_token String temp_token_host @@ -13,6 +14,7 @@ workflow vcf_gz { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, + dataset = dataset, filter_out_references = filter_out_references, temp_token = temp_token, temp_token_host = temp_token_host @@ -25,6 +27,7 @@ task vcf_gz_gohan { String gohan_url String vcf_gz_file_name String assembly_id + String dataset String filter_out_references String temp_token String temp_token_host @@ -32,7 +35,7 @@ task vcf_gz_gohan { command { echo "Using temporary-token : ${temp_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&filterOutReferences=${filter_out_references}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" # TODO: refactor # append temporary-token header if present From 1a0e59f823aae1edebbb70237c2585034f7d1350 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 6 Jun 2023 17:54:02 -0400 Subject: [PATCH 08/49] chore: add dataset to response object type --- src/api/models/dtos/main.go | 1 + src/api/mvc/variants/main.go | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 539c87df..823e47bf 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -55,6 +55,7 @@ type VariantCall struct { // TODO: GenotypeProbability, PhredScaleLikelyhood ? AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"` + Dataset string `json:"dataset,omitempty"` DocumentId string `json:"documentId,omitempty"` } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 522f64b4..393aac3d 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -652,9 +652,9 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu SampleId: sampleId, GenotypeType: zygosity.ZygosityToString(variant.Sample.Variation.Genotype.Zygosity), Alleles: []string{alleles.Left, alleles.Right}, - - AssemblyId: variant.AssemblyId, - DocumentId: docId, + Dataset: variant.Dataset, + AssemblyId: variant.AssemblyId, + DocumentId: docId, }) } } From 3a47884b17dd23ab90a24bdb2a050cb63c465efb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 6 Jun 2023 17:59:34 -0400 Subject: [PATCH 09/49] chore: add datasets to variants overview --- src/api/services/variants/main.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/api/services/variants/main.go b/src/api/services/variants/main.go index eac9bfe7..f45d7fbb 100644 --- a/src/api/services/variants/main.go +++ b/src/api/services/variants/main.go @@ -86,6 +86,10 @@ func GetVariantsOverview(es *elasticsearch.Client, cfg *models.Config) map[strin wg.Add(1) go callGetBucketsByKeyword("assemblyIDs", "assemblyId.keyword", &wg) + // get distribution of datasets + wg.Add(1) + go callGetBucketsByKeyword("datasets", "dataset.keyword", &wg) + wg.Wait() return resultsMap From 315170aa28e31631baa936425f533b0e8f1bd86e Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 7 Jun 2023 23:41:10 -0400 Subject: [PATCH 10/49] chore: dataset summary --- src/api/main.go | 3 + src/api/models/dtos/main.go | 6 ++ src/api/mvc/variants/main.go | 63 ++++++++++++- .../repositories/elasticsearch/variants.go | 92 ++++++++++++++++++- 4 files changed, 161 insertions(+), 3 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index eeadc496..fc8a10e8 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -172,6 +172,9 @@ func main() { gam.MandateSampleIdsSingularAttribute, gam.ValidatePotentialGenotypeQueryParameter) + // --- Dataset + e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary) + // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 823e47bf..6d782967 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -59,6 +59,12 @@ type VariantCall struct { DocumentId string `json:"documentId,omitempty"` } +// --- Dataset +type DatasetSummaryResponseDto struct { + Count int `json:"count"` + DataTypeSpecific map[string]interface{} `json:"data_type_specific"` // TODO: type-safety? +} + // -- Genes type GenesResponseDTO struct { Status int `json:"status"` diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 393aac3d..e48c51a7 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -19,6 +19,7 @@ import ( a "gohan/api/models/constants/assembly-id" s "gohan/api/models/constants/sort" "gohan/api/models/dtos" + "gohan/api/models/dtos/errors" "gohan/api/models/indexes" "gohan/api/models/ingest" "gohan/api/mvc" @@ -446,6 +447,64 @@ func GetAllVariantIngestionRequests(c echo.Context) error { return c.JSON(http.StatusOK, m) } +func GetDatasetSummary(c echo.Context) error { + fmt.Printf("[%s] - GetDatasetSummary hit!\n", time.Now()) + + cfg := c.(*contexts.GohanContext).Config + es := c.(*contexts.GohanContext).Es7Client + // obtain dataset from the path + dataset := c.Param("dataset") + + // dataset must be provided + if dataset == "" { + fmt.Println("Missing dataset") + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing dataset - please try again")) + } + + totalVariantsCount := 0.0 + + docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + "*", 0, 0, + "", "", // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "", dataset) + if countError != nil { + fmt.Printf("Failed to count variants in dataset %s\n", dataset) + return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) + } + + totalVariantsCount = docs["count"].(float64) + + // obtain number of samples associated with this tableId + resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) + if bucketsError != nil { + fmt.Println(resultingBuckets) + } + + // retrieve aggregations.items.buckets + // and count number of samples + bucketsMapped := []interface{}{} + if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { + aggsMapped := aggs.(map[string]interface{}) + + if items, itemsOk := aggsMapped["items"]; itemsOk { + itemsMapped := items.(map[string]interface{}) + + if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { + bucketsMapped = buckets.([]interface{}) + } + } + } + + fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) + + return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ + Count: int(totalVariantsCount), + DataTypeSpecific: map[string]interface{}{ + "samples": len(bucketsMapped), + }, + }) +} + func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config @@ -718,7 +777,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, _id, "", // note : "" is for sampleId - reference, alternative, alleles, genotype, assemblyId) + reference, alternative, alleles, genotype, assemblyId, "") } else { // implied sampleId query fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) @@ -727,7 +786,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, "", _id, // note : "" is for variantId - reference, alternative, alleles, genotype, assemblyId) + reference, alternative, alleles, genotype, assemblyId, "") } if countError != nil { diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index 1747d653..d2cfa782 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -317,7 +317,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, chromosome string, lowerBound int, upperBound int, variantId string, sampleId string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId c.AssemblyId, dataset string) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ @@ -377,6 +377,14 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }, }) } + + if dataset != "" { + mustMap = append(mustMap, map[string]interface{}{ + "query_string": map[string]interface{}{ + "query": "dataset:" + dataset, + }}) + } + rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? @@ -565,6 +573,88 @@ func GetVariantsBucketsByKeyword(cfg *models.Config, es *elasticsearch.Client, k return result, nil } +func GetVariantsBucketsByKeywordAndDataset(cfg *models.Config, es *elasticsearch.Client, keyword string, dataset string) (map[string]interface{}, error) { + // begin building the request body. + var buf bytes.Buffer + aggMap := map[string]interface{}{ + "size": "0", + "aggs": map[string]interface{}{ + "items": map[string]interface{}{ + "terms": map[string]interface{}{ + "field": keyword, + "size": "10000", // increases the number of buckets returned (default is 10) + "order": map[string]string{ + "_key": "asc", + }, + }, + }, + }, + } + + if dataset != "" { + aggMap["query"] = map[string]interface{}{ + "match": map[string]interface{}{ + "dataset": dataset, + }, + } + } + + // encode the query + if err := json.NewEncoder(&buf).Encode(aggMap); err != nil { + log.Fatalf("Error encoding aggMap: %s\n", err) + return nil, err + } + + if cfg.Debug { + // view the outbound elasticsearch query + myString := string(buf.Bytes()[:]) + fmt.Println(myString) + } + + if cfg.Debug { + http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + } + // Perform the search request. + res, searchErr := es.Search( + es.Search.WithContext(context.Background()), + es.Search.WithIndex(wildcardVariantsIndex), + es.Search.WithBody(&buf), + es.Search.WithTrackTotalHits(true), + es.Search.WithPretty(), + ) + if searchErr != nil { + fmt.Printf("Error getting response: %s\n", searchErr) + return nil, searchErr + } + + defer res.Body.Close() + + resultString := res.String() + if cfg.Debug { + fmt.Println(resultString) + } + + // Declared an empty interface + result := make(map[string]interface{}) + + // Unmarshal or Decode the JSON to the interface. + // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming + bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) + if !strings.Contains(bracketString, "200") { + return nil, fmt.Errorf("failed to get buckets by keyword: got '%s'", bracketString) + } + // umErr := json.Unmarshal([]byte(resultString[9:]), &result) + umErr := json.Unmarshal([]byte(jsonBodyString), &result) + if umErr != nil { + fmt.Printf("Error unmarshalling response: %s\n", umErr) + return nil, umErr + } + + fmt.Printf("Query End: %s\n", time.Now()) + + return result, nil +} + // -- internal use only -- func addAllelesToShouldMap(alleles []string, genotype c.GenotypeQuery, allelesShouldMap []map[string]interface{}) ([]map[string]interface{}, int) { minimumShouldMatch := 0 From 2c2df96d66beae0646dc79bcd02c0b48af5bbb4f Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Thu, 8 Jun 2023 00:32:24 -0400 Subject: [PATCH 11/49] chore: optimize dataset summary --- src/api/go.mod | 2 +- src/api/go.sum | 2 + src/api/mvc/variants/main.go | 84 ++++++++++++------- .../repositories/elasticsearch/variants.go | 2 + 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/api/go.mod b/src/api/go.mod index 5129978c..1011458f 100644 --- a/src/api/go.mod +++ b/src/api/go.mod @@ -29,7 +29,7 @@ require ( github.com/valyala/fasttemplate v1.2.2 // indirect golang.org/x/crypto v0.4.0 // indirect golang.org/x/net v0.3.0 // indirect - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect + golang.org/x/sync v0.2.0 // indirect golang.org/x/sys v0.3.0 // indirect golang.org/x/text v0.5.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/src/api/go.sum b/src/api/go.sum index f00ea740..9ae3f5e3 100644 --- a/src/api/go.sum +++ b/src/api/go.sum @@ -52,6 +52,8 @@ golang.org/x/net v0.3.0 h1:VWL6FNY2bEEmsGVKabSlHu5Irp34xmMRoqb/9lF9lxk= golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= +golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index e48c51a7..2f82b81d 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -33,6 +33,8 @@ import ( "github.com/mitchellh/mapstructure" "github.com/labstack/echo" + + "golang.org/x/sync/errgroup" ) func VariantsIngestionStats(c echo.Context) error { @@ -461,48 +463,66 @@ func GetDatasetSummary(c echo.Context) error { return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing dataset - please try again")) } - totalVariantsCount := 0.0 + // parallelize these two es queries - docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, - "*", 0, 0, - "", "", // note : both variantId and sampleId are deliberately set to "" - "", "", []string{}, "", "", dataset) - if countError != nil { - fmt.Printf("Failed to count variants in dataset %s\n", dataset) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } + var ( + totalVariantsCount = 0.0 + bucketsMapped = []interface{}{} + g = new(errgroup.Group) + ) + // request #1 + g.Go(func() error { + docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + "*", 0, 0, + "", "", // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "", dataset) + if countError != nil { + fmt.Printf("Failed to count variants in dataset %s\n", dataset) + return countError + } - totalVariantsCount = docs["count"].(float64) + totalVariantsCount = docs["count"].(float64) + return nil + }) - // obtain number of samples associated with this tableId - resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) - if bucketsError != nil { - fmt.Println(resultingBuckets) - } + // request #2 + g.Go(func() error { + // obtain number of samples associated with this tableId + resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) + if bucketsError != nil { + fmt.Printf("Failed to bucket dataset %s variants\n", dataset) + return bucketsError + } - // retrieve aggregations.items.buckets - // and count number of samples - bucketsMapped := []interface{}{} - if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { - aggsMapped := aggs.(map[string]interface{}) + // retrieve aggregations.items.buckets + // and count number of samples + if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { + aggsMapped := aggs.(map[string]interface{}) - if items, itemsOk := aggsMapped["items"]; itemsOk { - itemsMapped := items.(map[string]interface{}) + if items, itemsOk := aggsMapped["items"]; itemsOk { + itemsMapped := items.(map[string]interface{}) - if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { - bucketsMapped = buckets.([]interface{}) + if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { + bucketsMapped = buckets.([]interface{}) + } } } - } + return nil + }) - fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) + // wait for all HTTP fetches to complete. + if err := g.Wait(); err == nil { + fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) - return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ - Count: int(totalVariantsCount), - DataTypeSpecific: map[string]interface{}{ - "samples": len(bucketsMapped), - }, - }) + return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ + Count: int(totalVariantsCount), + DataTypeSpecific: map[string]interface{}{ + "samples": len(bucketsMapped), + }, + }) + } else { + return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) + } } func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index d2cfa782..e040942b 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -611,6 +611,8 @@ func GetVariantsBucketsByKeywordAndDataset(cfg *models.Config, es *elasticsearch fmt.Println(myString) } + fmt.Printf("Query Start: %s\n", time.Now()) + if cfg.Debug { http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } From cba7a5782f5685cd481207b928ec3605e4f0f0aa Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Thu, 8 Jun 2023 01:59:26 -0400 Subject: [PATCH 12/49] chore: type safe dataset qp - improved http context composition --- src/api/contexts/contexts.go | 9 +++++++++ src/api/middleware/datasetMiddleware.go | 8 +++++++- src/api/mvc/variants/main.go | 10 ++++++---- src/api/services/ingestion.go | 3 ++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index bc829a76..4ddce517 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -2,10 +2,12 @@ package contexts import ( "gohan/api/models" + "gohan/api/models/constants" "gohan/api/services" variantsService "gohan/api/services/variants" es7 "github.com/elastic/go-elasticsearch/v7" + "github.com/google/uuid" "github.com/labstack/echo" ) @@ -18,5 +20,12 @@ type ( Config *models.Config IngestionService *services.IngestionService VariantService *variantsService.VariantService + QueryParameters + } + + // Convenient storage for relevant http context data + QueryParameters struct { + AssemblyId constants.AssemblyId + Dataset uuid.UUID } ) diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index a86ddb59..93e16c60 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -2,10 +2,12 @@ package middleware import ( "fmt" + "gohan/api/contexts" "gohan/api/models/dtos/errors" "gohan/api/utils" "net/http" + "github.com/google/uuid" "github.com/labstack/echo" ) @@ -30,6 +32,10 @@ func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) } - return next(c) + // forward a type-safe value down the pipeline + gc := c.(*contexts.GohanContext) + gc.Dataset = uuid.MustParse(dataset) + + return next(gc) } } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 2f82b81d..4e645eb8 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -105,14 +105,14 @@ func VariantsCountBySampleId(c echo.Context) error { func VariantsIngest(c echo.Context) error { fmt.Printf("[%s] - VariantsIngest hit!\n", time.Now()) - cfg := c.(*contexts.GohanContext).Config + gc := c.(*contexts.GohanContext) + + cfg := gc.Config vcfPath := cfg.Api.VcfPath drsUrl := cfg.Drs.Url drsUsername := cfg.Drs.Username drsPassword := cfg.Drs.Password - ingestionService := c.(*contexts.GohanContext).IngestionService - // retrieve query parameters (comman separated) var fileNames []string // get vcf files @@ -209,8 +209,9 @@ func VariantsIngest(c echo.Context) error { // ----- } + // -- from query params assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId")) - dataset := c.QueryParam("dataset") + dataset := gc.Dataset // -- optional filter var ( @@ -231,6 +232,7 @@ func VariantsIngest(c echo.Context) error { // ingest vcf // ingserviceMux := sync.RWMutex{} + ingestionService := gc.IngestionService responseDtos := []ingest.IngestResponseDTO{} for _, fileName := range fileNames { diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index fa524a55..c74fb28b 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -33,6 +33,7 @@ import ( "github.com/Jeffail/gabs" "github.com/elastic/go-elasticsearch/v7" "github.com/elastic/go-elasticsearch/v7/esutil" + "github.com/google/uuid" "github.com/mitchellh/mapstructure" ) @@ -341,7 +342,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto } func (i *IngestionService) ProcessVcf( - gzippedFilePath string, drsFileId string, dataset string, + gzippedFilePath string, drsFileId string, dataset uuid.UUID, assemblyId constants.AssemblyId, filterOutReferences bool, lineProcessingConcurrencyLevel int) { From df72857a7e7064a3f74000dab9799d60112d32fe Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 16:16:54 -0400 Subject: [PATCH 13/49] patch: fix auto-merge errors --- src/api/contexts/contexts.go | 8 +------- src/api/mvc/main.go | 2 ++ src/api/mvc/variants/main.go | 5 +---- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index 3933b428..b1c36f38 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -21,13 +21,6 @@ type ( Config *models.Config IngestionService *services.IngestionService VariantService *variantsService.VariantService - QueryParameters - } - - // Convenient storage for relevant http context data - QueryParameters struct { - AssemblyId constants.AssemblyId - Dataset uuid.UUID } // Convenient storage for relevant http context data @@ -37,6 +30,7 @@ type ( Chromosome string Genotype constants.GenotypeQuery SampleIds []string + Dataset uuid.UUID PositionBounds } diff --git a/src/api/mvc/main.go b/src/api/mvc/main.go index 7694a572..635cc5f4 100644 --- a/src/api/mvc/main.go +++ b/src/api/mvc/main.go @@ -3,6 +3,8 @@ package mvc import ( "gohan/api/contexts" "gohan/api/models/constants" + a "gohan/api/models/constants/assembly-id" + gq "gohan/api/models/constants/genotype-query" "strings" "github.com/elastic/go-elasticsearch/v7" diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 0f308082..53cd9cba 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -109,6 +109,7 @@ func VariantsIngest(c echo.Context) error { // query parameters assemblyId := gc.AssemblyId + dataset := gc.Dataset // retrieve query parameters (comman separated) var fileNames []string @@ -206,10 +207,6 @@ func VariantsIngest(c echo.Context) error { // ----- } - // -- from query params - assemblyId := gc.AssemblyId - dataset := gc.Dataset - // -- optional filter var ( filterOutReferences bool = false // default From 32930bcfa630da7dd059aa0ea62c039d5d5ae1dc Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 16:23:30 -0400 Subject: [PATCH 14/49] patch: readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dc60ffe3..91fdecbf 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ # move vcf.gz files to `$GOHAN_API_VCF_PATH` # ingest vcf.gz - curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true + curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true\&dataset=00000000-0000-0000-0000-000000000000 # monitor progress: curl -k https://gohan.local/variants/ingestion/requests From 277347777e7ba588c67143d4fb438fdf469b39e7 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Fri, 7 Jul 2023 02:25:06 -0400 Subject: [PATCH 15/49] patch: tableId dataset typos --- src/api/mvc/variants/main.go | 2 +- src/api/tests/build/api/variants_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 53cd9cba..b9f8c4a4 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -484,7 +484,7 @@ func GetDatasetSummary(c echo.Context) error { // request #2 g.Go(func() error { - // obtain number of samples associated with this tableId + // obtain number of samples associated with this dataset resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) if bucketsError != nil { fmt.Printf("Failed to bucket dataset %s variants\n", dataset) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 9357085b..d72413f3 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -38,7 +38,7 @@ const ( func TestDemoVcfIngestion(t *testing.T) { cfg := common.InitConfig() - tableId := uuid.NewString() + dataset := uuid.NewString() t.Run("Ingest Demo VCF", func(t *testing.T) { // verify ingestion endpoint @@ -90,7 +90,7 @@ func TestDemoVcfIngestion(t *testing.T) { assemblyId := "GRCh38" containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) - queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&tableId=%s", assemblyId, containerizedVcfFilePath, tableId) + queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&dataset=%s", assemblyId, containerizedVcfFilePath, dataset) ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) initialIngestionDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingestUrl) From f4ed1ddcf89b19ce727a7ad33737018000a0c5eb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Fri, 7 Jul 2023 03:10:11 -0400 Subject: [PATCH 16/49] patch|test: dataset uuid ingestion patch and test --- src/api/mvc/variants/main.go | 17 ++++++----------- src/api/services/ingestion.go | 2 +- src/api/tests/build/api/variants_test.go | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index b9f8c4a4..e9779ded 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -449,16 +449,11 @@ func GetAllVariantIngestionRequests(c echo.Context) error { func GetDatasetSummary(c echo.Context) error { fmt.Printf("[%s] - GetDatasetSummary hit!\n", time.Now()) - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - // obtain dataset from the path - dataset := c.Param("dataset") + gc := c.(*contexts.GohanContext) + cfg := gc.Config + es := gc.Es7Client - // dataset must be provided - if dataset == "" { - fmt.Println("Missing dataset") - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing dataset - please try again")) - } + dataset := gc.Dataset // parallelize these two es queries @@ -472,7 +467,7 @@ func GetDatasetSummary(c echo.Context) error { docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, "*", 0, 0, "", "", // note : both variantId and sampleId are deliberately set to "" - "", "", []string{}, "", "", dataset) + "", "", []string{}, "", "", dataset.String()) if countError != nil { fmt.Printf("Failed to count variants in dataset %s\n", dataset) return countError @@ -485,7 +480,7 @@ func GetDatasetSummary(c echo.Context) error { // request #2 g.Go(func() error { // obtain number of samples associated with this dataset - resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) + resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset.String()) if bucketsError != nil { fmt.Printf("Failed to bucket dataset %s variants\n", dataset) return bucketsError diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index c74fb28b..40e434ec 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -420,7 +420,7 @@ func (i *IngestionService) ProcessVcf( tmpVariant["fileId"] = drsFileId tmpVariant["assemblyId"] = assemblyId - tmpVariant["dataset"] = dataset + tmpVariant["dataset"] = dataset.String() // skip this call if need be skipThisCall := false diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index d72413f3..83f35b8d 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -164,6 +164,22 @@ func TestDemoVcfIngestion(t *testing.T) { // check variants overview overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) + + // check datasets + assert.NotNil(t, overviewJson["datasets"]) + assert.NotNil(t, overviewJson["datasets"].(map[string]interface{})) + + datasets := overviewJson["datasets"].(map[string]interface{}) + assert.NotZero(t, len(datasets)) + for k, v := range datasets { + key := k + value := v.(float64) + assert.NotNil(t, key) + assert.NotNil(t, value) + assert.NotEmpty(t, key) + assert.NotEmpty(t, value) + assert.Greater(t, value, 0.0) + } }) t.Run("Test Simple Chromosome Queries", func(t *testing.T) { From 82939ffdb0cf0b4c4bf971d2d87bb2c46efe15e0 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Fri, 7 Jul 2023 03:24:58 -0400 Subject: [PATCH 17/49] test: variant overview content check --- src/api/tests/build/api/variants_test.go | 30 +++++++++++++----------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 83f35b8d..59c2ccf0 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -165,20 +165,22 @@ func TestDemoVcfIngestion(t *testing.T) { overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) - // check datasets - assert.NotNil(t, overviewJson["datasets"]) - assert.NotNil(t, overviewJson["datasets"].(map[string]interface{})) - - datasets := overviewJson["datasets"].(map[string]interface{}) - assert.NotZero(t, len(datasets)) - for k, v := range datasets { - key := k - value := v.(float64) - assert.NotNil(t, key) - assert.NotNil(t, value) - assert.NotEmpty(t, key) - assert.NotEmpty(t, value) - assert.Greater(t, value, 0.0) + // verify variant overview content + for oK, oV := range overviewJson { + assert.NotNil(t, oV) + + assert.NotNil(t, overviewJson[oK]) + assert.NotNil(t, overviewJson[oK].(map[string]interface{})) + + for k, v := range oV.(map[string]interface{}) { + key := k + assert.NotNil(t, v) + value := v.(float64) + assert.NotNil(t, key) + assert.NotEmpty(t, key) + assert.NotEmpty(t, value) + assert.NotZero(t, value) + } } }) From 234008220d184d7f7c21c2e762da5eb3e6ff15d9 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 10 Jul 2023 17:16:10 -0400 Subject: [PATCH 18/49] chore: dataset query test coverage --- src/api/tests/build/api/variants_test.go | 55 ++++++++++++++---------- src/api/tests/common/common.go | 25 ++++++----- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 59c2ccf0..17c004c4 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -38,7 +38,7 @@ const ( func TestDemoVcfIngestion(t *testing.T) { cfg := common.InitConfig() - dataset := uuid.NewString() + dataset := uuid.New() t.Run("Ingest Demo VCF", func(t *testing.T) { // verify ingestion endpoint @@ -90,7 +90,7 @@ func TestDemoVcfIngestion(t *testing.T) { assemblyId := "GRCh38" containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) - queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&dataset=%s", assemblyId, containerizedVcfFilePath, dataset) + queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&dataset=%s", assemblyId, containerizedVcfFilePath, dataset.String()) ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) initialIngestionDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingestUrl) @@ -182,21 +182,30 @@ func TestDemoVcfIngestion(t *testing.T) { assert.NotZero(t, value) } } + fmt.Println(overviewJson) }) t.Run("Test Simple Chromosome Queries", func(t *testing.T) { // simple chromosome-1 query - chromQueryResponse := common.BuildQueryAndMakeGetVariantsCall("1", "*", true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + chromQueryResponse := common.BuildQueryAndMakeGetVariantsCall("1", "*", dataset, true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(chromQueryResponse.Results) > 0) assert.True(t, len(chromQueryResponse.Results[0].Calls) > 0) }) + t.Run("Test Query by Dataset", func(t *testing.T) { + // simple query by dataset using the id generated above and ingested with + byDatsetQueryResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(byDatsetQueryResponse.Results) > 0) + assert.True(t, len(byDatsetQueryResponse.Results[0].Calls) > 0) + }) + t.Run("Test Simple Allele Queries", func(t *testing.T) { // TODO: not hardcoded tests // simple allele queries - common.GetAndVerifyVariantsResults(cfg, t, "CAG") - common.GetAndVerifyVariantsResults(cfg, t, "CAAAA") - common.GetAndVerifyVariantsResults(cfg, t, "T") - common.GetAndVerifyVariantsResults(cfg, t, "C") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "CAG") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "CAAAA") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "T") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "C") // random number between 1 and 5 // allelleLen := rand.Intn(5) + 1 @@ -206,7 +215,7 @@ func TestDemoVcfIngestion(t *testing.T) { }) t.Run("Test Variant Info Present", func(t *testing.T) { - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, true, s.Undefined, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, true, s.Undefined, gq.UNCALLED, "", "") // assert that all of the responses include valid sets of info // - * accumulate all infos into a single list using the set of @@ -242,7 +251,7 @@ func TestDemoVcfIngestion(t *testing.T) { }) t.Run("Test No Variant Info Present", func(t *testing.T) { - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Undefined, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, false, s.Undefined, gq.UNCALLED, "", "") // assert that all responses from all combinations have no results for _, dtoResponse := range allDtoResponses { @@ -257,7 +266,7 @@ func TestDemoVcfIngestion(t *testing.T) { t.Run("Test Get Variants in Ascending Order", func(t *testing.T) { // retrieve responses in ascending order - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Ascending, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, false, s.Ascending, gq.UNCALLED, "", "") // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) @@ -286,7 +295,7 @@ func TestDemoVcfIngestion(t *testing.T) { t.Run("Test Get Variants in Descending Order", func(t *testing.T) { // retrieve responses in descending order - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Descending, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, false, s.Descending, gq.UNCALLED, "", "") // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) @@ -358,7 +367,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousAlternateSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) // Homozygous Reference Variants With Various References specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -370,7 +379,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousReferenceSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) //Heterozygous Variants With Various References specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -382,7 +391,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHeterozygousSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Reference, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HETEROZYGOUS, ratt.Reference, specificValidation) // Homozygous Alternate Variants With Various Alternatives specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -394,7 +403,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousAlternateSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Alternative, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_ALTERNATE, ratt.Alternative, specificValidation) // Homozygous Reference Variants With Various Alternatives specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -406,7 +415,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousReferenceSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Alternative, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_REFERENCE, ratt.Alternative, specificValidation) // Heterozygous Variants With Various Alternatives specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -418,13 +427,13 @@ func TestDemoVcfIngestion(t *testing.T) { validateHeterozygousSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) }) t.Run("Test Can Get Variants With Wildcard Alternatives", func(t *testing.T) { allele := "ATTN" // example allele - TODO: render more sophisticated randomization // TODO: improve variant call testing from being 1 call to many random ones - dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", dataset, true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -459,7 +468,7 @@ func TestDemoVcfIngestion(t *testing.T) { t.Run("Test Can Get Variants With Wildcard References", func(t *testing.T) { allele := "ATTN" // example allele - TODO: render more sophisticated randomization // TODO: improve variant call testing from being 1 call to many random ones - dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", dataset, true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -494,7 +503,7 @@ func TestDemoVcfIngestion(t *testing.T) { // iterate over all 'allele's queried for qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths for _, qAllele := range qAlleles { - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) for _, dto := range dtos.Results { fmt.Printf("Got %d calls from allele query %s \n", len(dto.Calls), qAllele) if len(dto.Calls) == 0 { @@ -532,7 +541,7 @@ func TestDemoVcfIngestion(t *testing.T) { // iterate over all 'allele pairs' for _, qAllelePair := range qAllelePairs { - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) for _, dto := range dtos.Results { if len(dto.Calls) == 0 { continue @@ -566,7 +575,7 @@ func TestDemoVcfIngestion(t *testing.T) { } // skip valid calls limitedAlleles := strings.Join(qAlleles[:i], ",") - invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) + invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) // make sure only an error was returned assert.True(t, invalidReqResObj.Status == 400) @@ -649,7 +658,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc assemblyId := a.CastToAssemblyId(_combination[2]) // make the call - dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, uuid.Nil, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) assert.Equal(_t, 1, len(dto.Results)) diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index fb5d99d1..0e14c118 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -26,6 +26,7 @@ import ( "testing" . "github.com/ahmetb/go-linq" + "github.com/google/uuid" "github.com/stretchr/testify/assert" yaml "gopkg.in/yaml.v2" ) @@ -160,8 +161,8 @@ func CreateAndGetNewFile(filePath string) (*os.File, error) { return newFile, newFileErr } -func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, qAllele string) { - responseDtos := BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, _t, _cfg) +func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, dataset uuid.UUID, qAllele string) { + responseDtos := BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", qAllele, false, _t, _cfg) assert.NotNil(_t, responseDtos.Results) assert.True(_t, len(responseDtos.Results) > 0) @@ -209,7 +210,7 @@ func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, qAllele str } func BuildQueryAndMakeGetVariantsCall( - chromosome string, sampleId string, includeInfo bool, + chromosome string, sampleId string, dataset uuid.UUID, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, referenceAllelePattern string, alternativeAllelePattern string, commaDeliminatedAlleles string, ignoreStatusCode bool, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse { @@ -220,6 +221,10 @@ func BuildQueryAndMakeGetVariantsCall( queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&chromosome=%s", chromosome)) } + if dataset != uuid.Nil && dataset.String() != "" { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&dataset=%s", dataset.String())) + } + if genotype != gq.UNCALLED { queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&genotype=%s", string(genotype))) } @@ -238,7 +243,7 @@ func BuildQueryAndMakeGetVariantsCall( return makeGetVariantsCall(url, ignoreStatusCode, _t) } -func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { +func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, dataset uuid.UUID, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { cfg := InitConfig() // retrieve the overview @@ -280,7 +285,7 @@ func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc assemblyId := a.CastToAssemblyId(_combination[2]) // make the call - dto := BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + dto := BuildQueryAndMakeGetVariantsCall(chrom, sampleId, dataset, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) assert.Equal(_t, 1, len(dto.Results)) @@ -358,7 +363,7 @@ func GetOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct } func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, - genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, + dataset uuid.UUID, genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { // TODO: use some kind of Allele Enum @@ -371,9 +376,9 @@ func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, switch refAltTestType { case ratt.Reference: - runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, _pat, "", specificValidation) + runAndValidateReferenceOrAlternativeQueryResults(_t, dataset, genotypeQuery, _pat, "", specificValidation) case ratt.Alternative: - runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, "", _pat, specificValidation) + runAndValidateReferenceOrAlternativeQueryResults(_t, dataset, genotypeQuery, "", _pat, specificValidation) default: println("Skipping Test -- no Ref/Alt Test Type provided") } @@ -384,11 +389,11 @@ func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, } func runAndValidateReferenceOrAlternativeQueryResults(_t *testing.T, - genotypeQuery c.GenotypeQuery, + dataset uuid.UUID, genotypeQuery c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string, specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { - allDtoResponses := GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) + allDtoResponses := GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, dataset, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) // assert that all of the responses include sample sets with the appropriate zygosity // - * accumulate all variants into a single list using the set of SelectManyT's and the SelectT From 1048d3fc93b5fa08352ed14c88fe9ceef917a775 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 10 Jul 2023 17:49:29 -0400 Subject: [PATCH 19/49] chore: improved datset query coverage - discovered bug (unknown dataset id returns results) --- src/api/tests/build/api/variants_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 17c004c4..54db9340 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -197,6 +197,17 @@ func TestDemoVcfIngestion(t *testing.T) { byDatsetQueryResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", "", false, t, cfg) assert.True(t, len(byDatsetQueryResponse.Results) > 0) assert.True(t, len(byDatsetQueryResponse.Results[0].Calls) > 0) + // verify dataset ids + From(byDatsetQueryResponse.Results).SelectManyT(func(data dtos.VariantGetResult) Query { // * + return From(data.Calls) + }).ForEachT(func(variant dtos.VariantCall) { + assert.Equal(t, dataset.String(), variant.Dataset) + }) + + // test unknown random dataset id + shouldBeEmptyResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", uuid.New(), true, "", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(shouldBeEmptyResponse.Results) > 0) + assert.True(t, len(shouldBeEmptyResponse.Results[0].Calls) == 0) }) t.Run("Test Simple Allele Queries", func(t *testing.T) { From 241d2c6c9eca33508d51c14b152276e29e7ac557 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 10 Jul 2023 18:36:26 -0400 Subject: [PATCH 20/49] patch|chore: dataset http middleware, repo query - improved tests --- src/api/main.go | 4 +++ src/api/middleware/datasetMiddleware.go | 27 ++++++++++++++++ src/api/mvc/main.go | 11 +++++-- src/api/mvc/variants/main.go | 26 +++++++++------- .../repositories/elasticsearch/variants.go | 31 +++++++++++++------ src/api/tests/build/api/variants_test.go | 6 ++++ 6 files changed, 81 insertions(+), 24 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index a2629965..be8b9a3f 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -144,6 +144,7 @@ func main() { e.GET("/variants/get/by/variantId", variantsMvc.VariantsGetByVariantId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, @@ -151,6 +152,7 @@ func main() { e.GET("/variants/get/by/sampleId", variantsMvc.VariantsGetBySampleId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, @@ -161,6 +163,7 @@ func main() { e.GET("/variants/count/by/variantId", variantsMvc.VariantsCountByVariantId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, @@ -168,6 +171,7 @@ func main() { e.GET("/variants/count/by/sampleId", variantsMvc.VariantsCountBySampleId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index 93e16c60..85420145 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -39,3 +39,30 @@ func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { return next(gc) } } + +/* +Echo middleware to ensure a `dataset` HTTP query parameter is valid if provided +*/ +func OptionalDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + gc := c.(*contexts.GohanContext) + + // check for dataset query parameter + dataset := c.QueryParam("dataset") + if len(dataset) > 0 { + // verify dataset is a valid UUID + // - assume it's a valid dataset if it's a uuid, + // further verification is done later + if !utils.IsValidUUID(dataset) { + fmt.Printf("Invalid dataset %s\n", dataset) + + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) + } + + // forward a type-safe value down the pipeline + gc.Dataset = uuid.MustParse(dataset) + } + + return next(gc) + } +} diff --git a/src/api/mvc/main.go b/src/api/mvc/main.go index 635cc5f4..bce71a86 100644 --- a/src/api/mvc/main.go +++ b/src/api/mvc/main.go @@ -8,10 +8,11 @@ import ( "strings" "github.com/elastic/go-elasticsearch/v7" + "github.com/google/uuid" "github.com/labstack/echo" ) -func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId) { +func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId, string) { gc := c.(*contexts.GohanContext) es := gc.Es7Client @@ -20,6 +21,12 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, lowerBound := gc.LowerBound upperBound := gc.UpperBound + // optional + datasetString := "" + if gc.Dataset != uuid.Nil { + datasetString = gc.Dataset.String() + } + reference := c.QueryParam("reference") alternative := c.QueryParam("alternative") @@ -47,5 +54,5 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, assemblyId = a.CastToAssemblyId(assemblyIdQP) } - return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId + return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index e9779ded..946edf69 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -466,8 +466,8 @@ func GetDatasetSummary(c echo.Context) error { g.Go(func() error { docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, "*", 0, 0, - "", "", // note : both variantId and sampleId are deliberately set to "" - "", "", []string{}, "", "", dataset.String()) + "", "", dataset.String(), // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "") if countError != nil { fmt.Printf("Failed to count variants in dataset %s\n", dataset) return countError @@ -518,9 +518,10 @@ func GetDatasetSummary(c echo.Context) error { } func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { - cfg := c.(*contexts.GohanContext).Config + gc := c.(*contexts.GohanContext) + cfg := gc.Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString = mvc.RetrieveCommonElements(c) // retrieve other query parameters relevent to this 'get' query --- getSampleIdsOnlyQP := c.QueryParam("getSampleIdsOnly") @@ -603,7 +604,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - _id, "", // note : "" is for sampleId + _id, "", datasetString, // note : "" is for sampleId reference, alternative, alleles, size, sortByPosition, includeInfoInResultSet, genotype, assemblyId, @@ -628,7 +629,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - "", _id, // note : "" is for variantId + "", _id, datasetString, // note : "" is for variantId reference, alternative, alleles, size, sortByPosition, includeInfoInResultSet, genotype, assemblyId, @@ -757,9 +758,10 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu } func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error { - cfg := c.(*contexts.GohanContext).Config + gc := c.(*contexts.GohanContext) + cfg := gc.Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString = mvc.RetrieveCommonElements(c) respDTO := dtos.VariantCountReponse{ Results: make([]dtos.VariantCountResult, 0), @@ -788,8 +790,8 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - _id, "", // note : "" is for sampleId - reference, alternative, alleles, genotype, assemblyId, "") + _id, "", datasetString, // note : "" is for sampleId + reference, alternative, alleles, genotype, assemblyId) } else { // implied sampleId query fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) @@ -797,8 +799,8 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - "", _id, // note : "" is for variantId - reference, alternative, alleles, genotype, assemblyId, "") + "", _id, datasetString, // note : "" is for variantId + reference, alternative, alleles, genotype, assemblyId) } if countError != nil { diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index 0877ba60..e5dc733b 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -106,7 +106,7 @@ func GetDocumentsByDocumentId(cfg *models.Config, es *elasticsearch.Client, id s func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, es *elasticsearch.Client, chromosome string, lowerBound int, upperBound int, - variantId string, sampleId string, + variantId string, sampleId string, datasetString string, reference string, alternative string, alleles []string, size int, sortByPosition c.SortDirection, includeInfoInResultSet bool, @@ -144,6 +144,15 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e }) } + if datasetString != "" { + mustMap = append(mustMap, map[string]interface{}{ + "query_string": map[string]interface{}{ + "fields": []string{"dataset.keyword"}, + "query": datasetString, + }, + }) + } + if alternative != "" { mustMap = append(mustMap, map[string]interface{}{ "query_string": map[string]interface{}{ @@ -315,9 +324,9 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, es *elasticsearch.Client, chromosome string, lowerBound int, upperBound int, - variantId string, sampleId string, + variantId string, sampleId string, datasetString string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, dataset string) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ @@ -352,6 +361,15 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }) } + if datasetString != "" { + mustMap = append(mustMap, map[string]interface{}{ + "query_string": map[string]interface{}{ + "fields": []string{"dataset.keyword"}, + "query": datasetString, + }, + }) + } + if alternative != "" { mustMap = append(mustMap, map[string]interface{}{ "query_string": map[string]interface{}{ @@ -378,13 +396,6 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }) } - if dataset != "" { - mustMap = append(mustMap, map[string]interface{}{ - "query_string": map[string]interface{}{ - "query": "dataset:" + dataset, - }}) - } - rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 54db9340..6d10a3e2 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -208,6 +208,12 @@ func TestDemoVcfIngestion(t *testing.T) { shouldBeEmptyResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", uuid.New(), true, "", "", "GRCh38", "", "", "", false, t, cfg) assert.True(t, len(shouldBeEmptyResponse.Results) > 0) assert.True(t, len(shouldBeEmptyResponse.Results[0].Calls) == 0) + + // test without dataset id + // - should have content + plentifulResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", uuid.Nil, true, "", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(plentifulResponse.Results) > 0) + assert.True(t, len(plentifulResponse.Results[0].Calls) > 0) }) t.Run("Test Simple Allele Queries", func(t *testing.T) { From 5682d716be6919bfe6c042e5360bea95b036a524 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 18:44:38 +0000 Subject: [PATCH 21/49] nodemon hot reload --- .github/workflows/api.build.yml | 2 +- src/api/dev.Dockerfile | 15 +++++++++++++++ src/api/nodemon.json | 19 +++++++++++++++++++ src/api/run.dev.bash | 7 +++++++ 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/api/dev.Dockerfile create mode 100644 src/api/nodemon.json create mode 100644 src/api/run.dev.bash diff --git a/.github/workflows/api.build.yml b/.github/workflows/api.build.yml index d9025228..87ef8983 100644 --- a/.github/workflows/api.build.yml +++ b/.github/workflows/api.build.yml @@ -46,5 +46,5 @@ jobs: registry-username: ${{ github.actor }} registry-password: ${{ secrets.GITHUB_TOKEN }} image-name: ghcr.io/bento-platform/gohan-api - development-dockerfile: Dockerfile + development-dockerfile: dev.Dockerfile dockerfile: Dockerfile diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile new file mode 100644 index 00000000..b17c1132 --- /dev/null +++ b/src/api/dev.Dockerfile @@ -0,0 +1,15 @@ +ARG BASE_IMAGE + +RUN apt-get update -y && \ + apt-get upgrade -y && \ + apt-get install -y tabix && \ + rm -rf /var/lib/apt/lists/* + +RUN npm install -g nodemon + +WORKDIR /gohan_api + +COPY run.dev.bash . +COPY nodemon.json . + +CMD ["bash", "./run.dev.bash"] diff --git a/src/api/nodemon.json b/src/api/nodemon.json new file mode 100644 index 00000000..422c87db --- /dev/null +++ b/src/api/nodemon.json @@ -0,0 +1,19 @@ +{ + "events": { + "crash": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi", + "restart": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi" + }, + "execMap": { + "go": "go run" + }, + "ext": "go", + "ignore": [ + "src/", + "node_modules/", + "build/", + ".github", + ".git" + ], + "delay": 500 + } + \ No newline at end of file diff --git a/src/api/run.dev.bash b/src/api/run.dev.bash new file mode 100644 index 00000000..235c7689 --- /dev/null +++ b/src/api/run.dev.bash @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +nodemon main.go & + +echo "==================== NODEMON GO WATCHING ====================" + +wait From dc3493f8d9da7bdc02f5f67193a83eac5736af67 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 18:48:28 +0000 Subject: [PATCH 22/49] add missing image dev.Dockerfile --- src/api/dev.Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index b17c1132..bffc7572 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,5 +1,7 @@ ARG BASE_IMAGE +FROM $BASE_IMAGE + RUN apt-get update -y && \ apt-get upgrade -y && \ apt-get install -y tabix && \ From 4a932531cf13e0740fc0dffe806590a357ff7666 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 18:52:08 +0000 Subject: [PATCH 23/49] node image --- src/api/dev.Dockerfile | 4 +--- src/api/nodemon.json | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index bffc7572..a5501011 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,6 +1,4 @@ -ARG BASE_IMAGE - -FROM $BASE_IMAGE +FROM ghcr.io/bento-platform/bento_base_image:node-debian-2023.03.22 RUN apt-get update -y && \ apt-get upgrade -y && \ diff --git a/src/api/nodemon.json b/src/api/nodemon.json index 422c87db..8c8245ca 100644 --- a/src/api/nodemon.json +++ b/src/api/nodemon.json @@ -8,7 +8,6 @@ }, "ext": "go", "ignore": [ - "src/", "node_modules/", "build/", ".github", From db1e78e2d5e2c74aeaa1e2768c15a327e7539d8c Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 19:22:32 +0000 Subject: [PATCH 24/49] go reload with air --- .air.toml | 41 +++++++++++++++++++++++++++++++++++++++++ src/api/dev.Dockerfile | 39 +++++++++++++++++++++++++++++++++------ src/api/nodemon.json | 18 ------------------ src/api/run.dev.bash | 7 ------- 4 files changed, 74 insertions(+), 31 deletions(-) create mode 100644 .air.toml delete mode 100644 src/api/nodemon.json delete mode 100644 src/api/run.dev.bash diff --git a/.air.toml b/.air.toml new file mode 100644 index 00000000..de4c9c0a --- /dev/null +++ b/.air.toml @@ -0,0 +1,41 @@ +# Working directory +# . or absolute path, please note that the directories following must be under root. +root = "." +tmp_dir = "tmp" + +[build] +# Just plain old shell command. You could use `make` as well. +cmd = "go build -o ./tmp/main ." +# Binary file yields from `cmd`. +bin = "tmp/main" +# Customize binary. +full_bin = "APP_ENV=dev APP_USER=air ./tmp/main" +# Watch these filename extensions. +include_ext = ["go", "tpl", "tmpl", "html"] +# Ignore these filename extensions or directories. +exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] +# Watch these directories if you specified. +include_dir = [./src] +# Exclude files. +exclude_file = [] +# It's not necessary to trigger build each time file changes if it's too frequent. +delay = 1000 # ms +# Stop to run old binary when build errors occur. +stop_on_error = true +# This log file places in your tmp_dir. +log = "air_errors.log" + +[log] +# Show log time +time = false + +[color] +# Customize each part's color. If no color found, use the raw app log. +main = "magenta" +watcher = "cyan" +build = "yellow" +runner = "green" + +[misc] +# Delete tmp directory on exit +clean_on_exit = true diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index a5501011..d02fa189 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,15 +1,42 @@ -FROM ghcr.io/bento-platform/bento_base_image:node-debian-2023.03.22 +ARG BUILDER_BASE_IMAGE +ARG BASE_IMAGE +# Stage 1 - builder +FROM $BUILDER_BASE_IMAGE as builder + +# Maintainer +LABEL maintainer="Brennan Brouillette " + +WORKDIR /build + +COPY . . + +# Build gohan api +RUN go mod vendor && \ + go build -ldflags="-s -w" -o gohan_api + +# Stage two - executioner +FROM $BASE_IMAGE + +# Debian updates +# - tabix for indexing VCFs +# - other base dependencies provided by the base image RUN apt-get update -y && \ apt-get upgrade -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* -RUN npm install -g nodemon +# Install air for hot-reload +RUN go get -u github.com/cosmtrek/air + +WORKDIR /app -WORKDIR /gohan_api +# Copy pre-built executable from builder stage +COPY --from=builder /build/gohan_api . -COPY run.dev.bash . -COPY nodemon.json . +# Copy static workflow files +COPY workflows/*.wdl /app/workflows/ -CMD ["bash", "./run.dev.bash"] +# Use base image entrypoint to set up user & gosu exec the command below +# Run +CMD [ "air" ] diff --git a/src/api/nodemon.json b/src/api/nodemon.json deleted file mode 100644 index 8c8245ca..00000000 --- a/src/api/nodemon.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "events": { - "crash": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi", - "restart": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi" - }, - "execMap": { - "go": "go run" - }, - "ext": "go", - "ignore": [ - "node_modules/", - "build/", - ".github", - ".git" - ], - "delay": 500 - } - \ No newline at end of file diff --git a/src/api/run.dev.bash b/src/api/run.dev.bash deleted file mode 100644 index 235c7689..00000000 --- a/src/api/run.dev.bash +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -nodemon main.go & - -echo "==================== NODEMON GO WATCHING ====================" - -wait From 0a7d02770bc138c5e3fb8b59f2ecb23b15174f56 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 19:46:48 +0000 Subject: [PATCH 25/49] dev dockerfile use builder base --- src/api/dev.Dockerfile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index d02fa189..7a3d4f26 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,5 +1,4 @@ ARG BUILDER_BASE_IMAGE -ARG BASE_IMAGE # Stage 1 - builder FROM $BUILDER_BASE_IMAGE as builder @@ -15,9 +14,6 @@ COPY . . RUN go mod vendor && \ go build -ldflags="-s -w" -o gohan_api -# Stage two - executioner -FROM $BASE_IMAGE - # Debian updates # - tabix for indexing VCFs # - other base dependencies provided by the base image @@ -32,7 +28,7 @@ RUN go get -u github.com/cosmtrek/air WORKDIR /app # Copy pre-built executable from builder stage -COPY --from=builder /build/gohan_api . +COPY /build/gohan_api . # Copy static workflow files COPY workflows/*.wdl /app/workflows/ From a27783020230b799d5d8546815ee6357c0977ca2 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 19:55:35 +0000 Subject: [PATCH 26/49] dev dockerfil fix --- src/api/dev.Dockerfile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 7a3d4f26..9596a6e1 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -6,13 +6,12 @@ FROM $BUILDER_BASE_IMAGE as builder # Maintainer LABEL maintainer="Brennan Brouillette " -WORKDIR /build +WORKDIR /app COPY . . # Build gohan api -RUN go mod vendor && \ - go build -ldflags="-s -w" -o gohan_api +RUN go mod vendor # Debian updates # - tabix for indexing VCFs @@ -25,11 +24,6 @@ RUN apt-get update -y && \ # Install air for hot-reload RUN go get -u github.com/cosmtrek/air -WORKDIR /app - -# Copy pre-built executable from builder stage -COPY /build/gohan_api . - # Copy static workflow files COPY workflows/*.wdl /app/workflows/ From 566046833abf0d906a934d5a6ac5ff3521e0ad82 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Mon, 14 Aug 2023 18:00:31 -0400 Subject: [PATCH 27/49] air config --- .air.toml | 2 +- src/api/dev.Dockerfile | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.air.toml b/.air.toml index de4c9c0a..42cdf0a7 100644 --- a/.air.toml +++ b/.air.toml @@ -15,7 +15,7 @@ include_ext = ["go", "tpl", "tmpl", "html"] # Ignore these filename extensions or directories. exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] # Watch these directories if you specified. -include_dir = [./src] +include_dir = [] # Exclude files. exclude_file = [] # It's not necessary to trigger build each time file changes if it's too frequent. diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 9596a6e1..da13aafa 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -11,7 +11,7 @@ WORKDIR /app COPY . . # Build gohan api -RUN go mod vendor +RUN go mod vendor && go install github.com/cosmtrek/air@latest # Debian updates # - tabix for indexing VCFs @@ -21,12 +21,9 @@ RUN apt-get update -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* -# Install air for hot-reload -RUN go get -u github.com/cosmtrek/air - # Copy static workflow files COPY workflows/*.wdl /app/workflows/ # Use base image entrypoint to set up user & gosu exec the command below # Run -CMD [ "air" ] +CMD [ "air", "-c", ".air.toml" ] From 75461853a869398649cbb0c08be32025f2509e2a Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Mon, 14 Aug 2023 18:09:55 -0400 Subject: [PATCH 28/49] test air install --- src/api/dev.Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index da13aafa..e804de55 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -8,10 +8,10 @@ LABEL maintainer="Brennan Brouillette Date: Tue, 15 Aug 2023 10:28:55 -0400 Subject: [PATCH 29/49] working air conf --- .air.toml | 41 ----------------------------------------- .gitignore | 1 + src/api/dev.Dockerfile | 17 +++++++++-------- 3 files changed, 10 insertions(+), 49 deletions(-) delete mode 100644 .air.toml diff --git a/.air.toml b/.air.toml deleted file mode 100644 index 42cdf0a7..00000000 --- a/.air.toml +++ /dev/null @@ -1,41 +0,0 @@ -# Working directory -# . or absolute path, please note that the directories following must be under root. -root = "." -tmp_dir = "tmp" - -[build] -# Just plain old shell command. You could use `make` as well. -cmd = "go build -o ./tmp/main ." -# Binary file yields from `cmd`. -bin = "tmp/main" -# Customize binary. -full_bin = "APP_ENV=dev APP_USER=air ./tmp/main" -# Watch these filename extensions. -include_ext = ["go", "tpl", "tmpl", "html"] -# Ignore these filename extensions or directories. -exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] -# Watch these directories if you specified. -include_dir = [] -# Exclude files. -exclude_file = [] -# It's not necessary to trigger build each time file changes if it's too frequent. -delay = 1000 # ms -# Stop to run old binary when build errors occur. -stop_on_error = true -# This log file places in your tmp_dir. -log = "air_errors.log" - -[log] -# Show log time -time = false - -[color] -# Customize each part's color. If no color found, use the raw app log. -main = "magenta" -watcher = "cyan" -build = "yellow" -runner = "green" - -[misc] -# Delete tmp directory on exit -clean_on_exit = true diff --git a/.gitignore b/.gitignore index 279451d5..06f9d3cf 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ data/ data-x/ # vcfs +vcfs/* */vcfs/*.txt */vcfs/*.vcf */vcfs/*.vcf.gz diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index e804de55..6ef078c4 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -8,11 +8,6 @@ LABEL maintainer="Brennan Brouillette Date: Tue, 15 Aug 2023 16:29:07 -0400 Subject: [PATCH 30/49] fix missing workflows from path --- src/api/dev.Dockerfile | 4 +++- src/api/workflows/main.go | 2 +- src/api/workflows/vcf_gz.wdl | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 6ef078c4..5880a1ee 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -25,6 +25,8 @@ RUN go mod download && go mod vendor COPY workflows/*.wdl /app/workflows/ # Repository mounted to the container -WORKDIR /app/src/api +# WORKDIR /app/repo/src/api +WORKDIR /gohan-api/src/api + CMD [ "air" ] diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index c2387cdb..d04ec42a 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -14,7 +14,7 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "description": "This ingestion workflow will validate and ingest a BGZip-Compressed-VCF into Elasticsearch.", "data_type": "variant", "file": "vcf_gz.wdl", - "purpose": "ingestion", + "action": "ingestion", "inputs": []map[string]interface{}{ { "id": "vcf_gz_file_names", diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 603b54c1..83625fe7 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -3,7 +3,9 @@ workflow vcf_gz { Array[File] vcf_gz_file_names # redundant Array[String] original_vcf_gz_file_paths String assembly_id - String dataset + String project_id + String dataset_id + String service_url String filter_out_references String temp_token String temp_token_host @@ -14,7 +16,7 @@ workflow vcf_gz { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, - dataset = dataset, + dataset = dataset_id, filter_out_references = filter_out_references, temp_token = temp_token, temp_token_host = temp_token_host From 6cffa72be3d174d2c0e53fae0b755e7d731fe9a3 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 16 Aug 2023 12:50:35 -0400 Subject: [PATCH 31/49] dev container uses workflow mount --- src/api/dev.Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 5880a1ee..601835db 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -21,12 +21,7 @@ RUN go install github.com/cosmtrek/air@latest COPY go.mod go.sum ./ RUN go mod download && go mod vendor -# Copy static workflow files -COPY workflows/*.wdl /app/workflows/ - # Repository mounted to the container -# WORKDIR /app/repo/src/api WORKDIR /gohan-api/src/api - CMD [ "air" ] From 1130879673dd7d32fc3e6d8bd283c98f0eeb0864 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 16 Aug 2023 20:39:04 +0000 Subject: [PATCH 32/49] vcf_gz.wdl calls gohan using access token --- src/api/workflows/vcf_gz.wdl | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 83625fe7..19d30cb7 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -1,26 +1,20 @@ workflow vcf_gz { - String gohan_url - Array[File] vcf_gz_file_names # redundant - Array[String] original_vcf_gz_file_paths + String service_url + Array[File] vcf_gz_file_names String assembly_id String project_id String dataset_id - String service_url String filter_out_references - String temp_token - String temp_token_host + String secret__access_token - # scatter(file_name in vcf_gz_file_names) { - scatter(file_name in original_vcf_gz_file_paths) { + scatter(file_name in vcf_gz_file_names) { call vcf_gz_gohan { - input: gohan_url = gohan_url, + input: gohan_url = service_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, dataset = dataset_id, filter_out_references = filter_out_references, - temp_token = temp_token, - temp_token_host = temp_token_host - + access_token = secret__access_token, } } } @@ -31,21 +25,18 @@ task vcf_gz_gohan { String assembly_id String dataset String filter_out_references - String temp_token - String temp_token_host + String access_token command { - echo "Using temporary-token : ${temp_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" # TODO: refactor # append temporary-token header if present - if [ "${temp_token}" == "" ] + if [ "${access_token}" == "" ] then RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') else - RUN_RESPONSE=$(curl -vvv -H "Host: ${temp_token_host}" -H "X-TT: ${temp_token}" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') + RUN_RESPONSE=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') fi echo $RUN_RESPONSE @@ -67,11 +58,11 @@ task vcf_gz_gohan { # TODO: refactor # fetch run requests # append temporary-token header if present - if [ "${temp_token}" == "" ] + if [ "${access_token}" == "" ] then REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) else - REQUESTS=$(curl -vvv -H "Host: ${temp_token_host}" -H "X-TT: ${temp_token}" "${gohan_url}/private/variants/ingestion/requests" -k) + REQUESTS=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/requests" -k) fi echo $REQUESTS From b10044d4b2a5bf9c0e178b9966c5b9f48561f4cc Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 16 Aug 2023 17:58:53 -0400 Subject: [PATCH 33/49] wes client token in workflow --- src/api/workflows/vcf_gz.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 19d30cb7..034df696 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -29,6 +29,7 @@ task vcf_gz_gohan { command { QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" + AUTH_HEADER="Authorization: Bearer ${access_token}" # TODO: refactor # append temporary-token header if present @@ -36,7 +37,7 @@ task vcf_gz_gohan { then RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') else - RUN_RESPONSE=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') + RUN_RESPONSE=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') fi echo $RUN_RESPONSE @@ -62,7 +63,7 @@ task vcf_gz_gohan { then REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) else - REQUESTS=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/requests" -k) + REQUESTS=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/requests" -k) fi echo $REQUESTS From 5afd1c6666e464d2b8e177d9b65f1666024665fe Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 17 Aug 2023 17:10:03 +0000 Subject: [PATCH 34/49] fix authz header in curl --- src/api/workflows/vcf_gz.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 034df696..38d17046 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -28,6 +28,8 @@ task vcf_gz_gohan { String access_token command { + echo "Using temporary-token : ${access_token}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" AUTH_HEADER="Authorization: Bearer ${access_token}" @@ -37,7 +39,7 @@ task vcf_gz_gohan { then RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') else - RUN_RESPONSE=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') + RUN_RESPONSE=$(curl -vvv -H "$AUTH_HEADER" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') fi echo $RUN_RESPONSE @@ -63,7 +65,7 @@ task vcf_gz_gohan { then REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) else - REQUESTS=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/requests" -k) + REQUESTS=$(curl -vvv -H "$AUTH_HEADER" "${gohan_url}/private/variants/ingestion/requests" -k) fi echo $REQUESTS From d7d7de7262dd9a9e58e138be1e27788516b3cb9f Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 17 Aug 2023 20:34:08 +0000 Subject: [PATCH 35/49] vscode go debug config and doc --- .gitignore | 3 ++- .vscode/launch.json | 15 +++++++++++++++ README.md | 8 ++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 .vscode/launch.json diff --git a/.gitignore b/.gitignore index 06f9d3cf..54413354 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -.vscode +.vscode/* +!.vscode/launch.json .DS_store .idea diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..d2b742bc --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Attach to PID (Bento)", + "type": "go", + "request": "attach", + "mode": "local", + "processId": 0, + } + ] +} diff --git a/README.md b/README.md index 91fdecbf..b608a2e9 100644 --- a/README.md +++ b/README.md @@ -495,3 +495,11 @@ Once `elasticsearch`, `drs`, the `api`, and the `gateway` are up, run make test-api-dev ``` +## Dev Container debug + +Interactive debug in VSCode is only possible When using the development image of gohan-api. + +Using the "Attach to PID(Bento)" debug config, select the PID associated with the following path: +``` +/gohan-api/src/api/tmp/main +``` From 0213c1a73230fc0d0f7e59e2905e0f985d5009d7 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 17 Aug 2023 21:05:21 +0000 Subject: [PATCH 36/49] removed private url path prefix for drs ingest calls --- src/api/services/ingestion.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 40e434ec..b70e23f1 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -257,7 +257,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto ) for { // prepare upload request to drs - r, _ := http.NewRequest("POST", drsUrl+"/private/ingest", bytes.NewBufferString(data)) + r, _ := http.NewRequest("POST", drsUrl+"/ingest", bytes.NewBufferString(data)) r.SetBasicAuth(drsUsername, drsPassword) r.Header.Add("Content-Type", "application/json") From 217cf8f737bfae2dd36bba843c4d2b187e23ae3e Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 21 Aug 2023 17:06:03 -0400 Subject: [PATCH 37/49] wes client auth --- src/api/mvc/variants/main.go | 13 +++++++++---- src/api/services/ingestion.go | 17 ++++++++++++----- src/api/workflows/vcf_gz.wdl | 4 +++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 946edf69..d1595476 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -104,8 +104,6 @@ func VariantsIngest(c echo.Context) error { cfg := gc.Config vcfPath := cfg.Api.VcfPath drsUrl := cfg.Drs.Url - drsUsername := cfg.Drs.Username - drsPassword := cfg.Drs.Password // query parameters assemblyId := gc.AssemblyId @@ -149,6 +147,13 @@ func VariantsIngest(c echo.Context) error { } // + // Authz related + authHeader := c.Request().Header.Get("Authorization") + datasetId := c.QueryParam("dataset") + projectId := c.QueryParam("project") + + c.Logger().Debug(authHeader, datasetId) + dirName := c.QueryParam("directory") if dirName != "" { if strings.HasPrefix(dirName, cfg.Drs.BridgeDirectory) { @@ -353,7 +358,7 @@ func VariantsIngest(c echo.Context) error { // --- push compressed to DRS fmt.Printf("Uploading %s to DRS !\n", gzippedFileName) - drsFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, gzippedFileName, drsUrl, drsUsername, drsPassword) + drsFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, gzippedFileName, drsUrl, projectId, datasetId, authHeader) if drsFileId == "" { msg := "Something went wrong: DRS File Id is empty for " + gzippedFileName fmt.Println(msg) @@ -367,7 +372,7 @@ func VariantsIngest(c echo.Context) error { // -- push tabix to DRS fmt.Printf("Uploading %s to DRS !\n", tabixFileNameWithRelativePath) - drsTabixFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, tabixFileNameWithRelativePath, drsUrl, drsUsername, drsPassword) + drsTabixFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, tabixFileNameWithRelativePath, drsUrl, projectId, datasetId, authHeader) if drsTabixFileId == "" { msg := "Something went wrong: DRS Tabix File Id is empty for " + tabixFileNameWithRelativePath fmt.Println(msg) diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index b70e23f1..056a66ef 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -19,6 +19,7 @@ import ( "io/ioutil" "log" "net/http" + "net/url" "os" "os/exec" "path" @@ -239,13 +240,13 @@ func (i *IngestionService) GenerateTabix(gzippedFilePath string) (string, string return dir, file, nil } -func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirectory string, gzippedFileName string, drsUrl, drsUsername, drsPassword string) string { +func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirectory string, gzippedFileName string, drsUrl string, project_id, dataset_id string, authHeader string) string { if cfg.Debug { http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } - data := fmt.Sprintf("{\"path\": \"%s/%s\"}", drsBridgeDirectory, gzippedFileName) + path := fmt.Sprintf("%s/%s", drsBridgeDirectory, gzippedFileName) var ( drsId string @@ -257,10 +258,16 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto ) for { // prepare upload request to drs - r, _ := http.NewRequest("POST", drsUrl+"/ingest", bytes.NewBufferString(data)) + form := url.Values{} + form.Add("path", path) + form.Add("dataset_id", dataset_id) + form.Add("project_id", project_id) + form.Add("data_type", "variant") - r.SetBasicAuth(drsUsername, drsPassword) - r.Header.Add("Content-Type", "application/json") + r, _ := http.NewRequest("POST", drsUrl+"/ingest", strings.NewReader(form.Encode())) + + r.Header.Add("Authorization", authHeader) + r.Header.Add("Content-Type", "application/x-www-form-urlencoded") client := &http.Client{} diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 38d17046..66cd30cd 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -12,6 +12,7 @@ workflow vcf_gz { input: gohan_url = service_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, + project = project_id, dataset = dataset_id, filter_out_references = filter_out_references, access_token = secret__access_token, @@ -23,6 +24,7 @@ task vcf_gz_gohan { String gohan_url String vcf_gz_file_name String assembly_id + String project String dataset String filter_out_references String access_token @@ -30,7 +32,7 @@ task vcf_gz_gohan { command { echo "Using temporary-token : ${access_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&project=${project}&filterOutReferences=${filter_out_references}" AUTH_HEADER="Authorization: Bearer ${access_token}" # TODO: refactor From 8cfdc2a3c36703a6cdcd0a0ad5c81034bba7d40c Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 22 Aug 2023 14:20:35 -0400 Subject: [PATCH 38/49] gohan url from config --- src/api/workflows/main.go | 7 +++++++ src/api/workflows/vcf_gz.wdl | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index d04ec42a..24903d8b 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -36,6 +36,13 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "values": []string{"true", "false"}, // simulate boolean type "default": "false", }, + { + "id": "gohan_url", + "type": "string", + "required": true, + "value": "FROM_CONFIG", + "hidden": true, + }, }, "outputs": []map[string]interface{}{ { diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 66cd30cd..343f3de3 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -1,5 +1,5 @@ workflow vcf_gz { - String service_url + String gohan_url Array[File] vcf_gz_file_names String assembly_id String project_id @@ -9,7 +9,7 @@ workflow vcf_gz { scatter(file_name in vcf_gz_file_names) { call vcf_gz_gohan { - input: gohan_url = service_url, + input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, project = project_id, From 7075ca3922fcc9259639e12a244d89d6e7382830 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 23 Aug 2023 16:26:25 +0000 Subject: [PATCH 39/49] uniform dataset responses --- src/api/main.go | 1 + src/api/models/dtos/main.go | 6 +++++- src/api/mvc/variants/main.go | 38 ++++++++++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index be8b9a3f..8f55502b 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -180,6 +180,7 @@ func main() { // --- Dataset e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary) + e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes) // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 6d782967..6f3ee11b 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -60,11 +60,15 @@ type VariantCall struct { } // --- Dataset -type DatasetSummaryResponseDto struct { +type DataTypeSummaryResponseDto struct { Count int `json:"count"` DataTypeSpecific map[string]interface{} `json:"data_type_specific"` // TODO: type-safety? } +type DatasetDataTypeSummaryResponseDto struct { + Variant DataTypeSummaryResponseDto `json:"variant"` +} + // -- Genes type GenesResponseDTO struct { Status int `json:"status"` diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index d1595476..9f750863 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -21,6 +21,7 @@ import ( "gohan/api/models/dtos/errors" "gohan/api/models/indexes" "gohan/api/models/ingest" + "gohan/api/models/schemas" "gohan/api/mvc" esRepo "gohan/api/repositories/elasticsearch" variantService "gohan/api/services/variants" @@ -510,18 +511,43 @@ func GetDatasetSummary(c echo.Context) error { // wait for all HTTP fetches to complete. if err := g.Wait(); err == nil { fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) - - return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ - Count: int(totalVariantsCount), - DataTypeSpecific: map[string]interface{}{ - "samples": len(bucketsMapped), + payload := &dtos.DatasetDataTypeSummaryResponseDto{ + Variant: dtos.DataTypeSummaryResponseDto{ + Count: int(totalVariantsCount), + DataTypeSpecific: map[string]interface{}{ + "samples": len(bucketsMapped), + }, }, - }) + } + return c.JSON(http.StatusOK, payload) } else { return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) } } +type DataTypeSummary struct { + Id string `json:"id"` + Label string `json:"label"` + Queryable bool `json:"queryable"` + Schema map[string]interface{} `json:"schema"` + Count int `json:"count"` +} + +type DataTypeResponseDto = []DataTypeSummary + +func GetDatasetDataTypes(c echo.Context) error { + count := 0 + return c.JSON(http.StatusOK, &DataTypeResponseDto{ + DataTypeSummary{ + Id: "variant", + Label: "Variants", + Queryable: true, + Schema: schemas.VARIANT_SCHEMA, + Count: count, + }, + }) +} + func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { gc := c.(*contexts.GohanContext) cfg := gc.Config From 485d66a3cc7c2aa0cf77b4ac7e28213d5b8ffc30 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 23 Aug 2023 21:18:40 +0000 Subject: [PATCH 40/49] fix dataset routes --- src/api/main.go | 8 ++++++-- src/api/middleware/datasetMiddleware.go | 16 ++++++++++++++++ src/api/mvc/variants/main.go | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index 8f55502b..696ca13e 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -179,8 +179,12 @@ func main() { gam.ValidatePotentialGenotypeQueryParameter) // --- Dataset - e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary) - e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes) + e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary, + // middleware + gam.MandateDatasetPathParam) + e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes, + // middleware + gam.MandateDatasetPathParam) // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index 85420145..60252624 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -40,6 +40,22 @@ func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { } } +func MandateDatasetPathParam(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + dataset := c.Param("dataset") + if !utils.IsValidUUID(dataset) { + fmt.Printf("Invalid dataset %s\n", dataset) + + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) + } + + gc := c.(*contexts.GohanContext) + gc.Dataset = uuid.MustParse(dataset) + + return next(gc) + } +} + /* Echo middleware to ensure a `dataset` HTTP query parameter is valid if provided */ diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 9f750863..677ce994 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -453,13 +453,13 @@ func GetAllVariantIngestionRequests(c echo.Context) error { } func GetDatasetSummary(c echo.Context) error { - fmt.Printf("[%s] - GetDatasetSummary hit!\n", time.Now()) gc := c.(*contexts.GohanContext) cfg := gc.Config es := gc.Es7Client dataset := gc.Dataset + fmt.Printf("[%s] - GetDatasetSummary hit: [%s]!\n", time.Now(), dataset.String()) // parallelize these two es queries From eff15b64c09fbe7a8df636b67eb251d6a4d5931e Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 24 Aug 2023 16:18:10 +0000 Subject: [PATCH 41/49] variants count by dataset endpoint --- src/api/mvc/variants/main.go | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 677ce994..f49b9ec7 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -452,6 +452,39 @@ func GetAllVariantIngestionRequests(c echo.Context) error { return c.JSON(http.StatusOK, m) } +func GetDatasetVariantsCount(c echo.Context) int { + gc := c.(*contexts.GohanContext) + cfg := gc.Config + es := gc.Es7Client + + dataset := gc.Dataset + + var ( + totalVariantsCount = 0.0 + g = new(errgroup.Group) + ) + // request #1 + g.Go(func() error { + docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + "*", 0, 0, + "", "", dataset.String(), // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "") + if countError != nil { + fmt.Printf("Failed to count variants in dataset %s\n", dataset) + return countError + } + + totalVariantsCount = docs["count"].(float64) + return nil + }) + + // wait for all HTTP fetches to complete. + if err := g.Wait(); err == nil { + fmt.Printf("Successfully Obtained Dataset '%s' variants count: '%f' \n", dataset, totalVariantsCount) + } + return int(totalVariantsCount) +} + func GetDatasetSummary(c echo.Context) error { gc := c.(*contexts.GohanContext) @@ -536,7 +569,7 @@ type DataTypeSummary struct { type DataTypeResponseDto = []DataTypeSummary func GetDatasetDataTypes(c echo.Context) error { - count := 0 + count := GetDatasetVariantsCount(c) return c.JSON(http.StatusOK, &DataTypeResponseDto{ DataTypeSummary{ Id: "variant", From 93b5dad8dab3e0df3959b8ea90c921667f28d8c1 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Fri, 25 Aug 2023 14:46:30 -0400 Subject: [PATCH 42/49] metadata_schema in data type response --- src/api/models/schemas/schemas.go | 4 ++++ src/api/mvc/data-types/main.go | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/api/models/schemas/schemas.go b/src/api/models/schemas/schemas.go index 7e520ea6..6077c209 100644 --- a/src/api/models/schemas/schemas.go +++ b/src/api/models/schemas/schemas.go @@ -7,6 +7,10 @@ import ( type Schema map[string]interface{} +var OBJECT_SCHEMA Schema = Schema{ + "type": "object", +} + var VARIANT_METADATA_SCHEMA Schema = map[string]interface{}{ "$id": "variant:metadata", // TODO: Real ID "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/src/api/mvc/data-types/main.go b/src/api/mvc/data-types/main.go index 299ee94f..d95d3e07 100644 --- a/src/api/mvc/data-types/main.go +++ b/src/api/mvc/data-types/main.go @@ -12,10 +12,11 @@ import ( ) var variantDataTypeJson = map[string]interface{}{ - "id": "variant", - "label": "Variants", - "queryable": true, - "schema": schemas.VARIANT_SCHEMA, + "id": "variant", + "label": "Variants", + "queryable": true, + "schema": schemas.VARIANT_SCHEMA, + "metadata_schema": schemas.OBJECT_SCHEMA, } func GetDataTypes(c echo.Context) error { From b324c00bd21459616ec597abaaba0655c65bc1a3 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 28 Aug 2023 16:47:20 +0000 Subject: [PATCH 43/49] delete variants by dataset id endpoint --- src/api/contexts/contexts.go | 1 + src/api/main.go | 3 + src/api/middleware/datasetMiddleware.go | 15 +++++ src/api/mvc/variants/main.go | 35 ++++++++++++ .../repositories/elasticsearch/variants.go | 56 +++++++++++++++++++ 5 files changed, 110 insertions(+) diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index b1c36f38..6cde63fd 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -31,6 +31,7 @@ type ( Genotype constants.GenotypeQuery SampleIds []string Dataset uuid.UUID + DataType string PositionBounds } diff --git a/src/api/main.go b/src/api/main.go index 696ca13e..87dae683 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -185,6 +185,9 @@ func main() { e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes, // middleware gam.MandateDatasetPathParam) + e.DELETE("/datasets/:dataset/data-types/:dataType", variantsMvc.ClearDataset, + gam.MandateDatasetPathParam, + gam.MandateDataTypePathParam) // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index 60252624..0a51bb7f 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -56,6 +56,21 @@ func MandateDatasetPathParam(next echo.HandlerFunc) echo.HandlerFunc { } } +func MandateDataTypePathParam(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + dataType := c.Param("dataType") + if dataType != "variant" { + fmt.Printf("Invalid data-type provided: %s\n", dataType) + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest( + fmt.Sprintf("invalid data-type %s - please provide a valid data-type (e.g. \"variant\")", dataType), + )) + } + gc := c.(*contexts.GohanContext) + gc.DataType = dataType + return next(gc) + } +} + /* Echo middleware to ensure a `dataset` HTTP query parameter is valid if provided */ diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index f49b9ec7..0b061783 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -558,6 +558,41 @@ func GetDatasetSummary(c echo.Context) error { } } +func ClearDataset(c echo.Context) error { + gc := c.(*contexts.GohanContext) + cfg := gc.Config + es := gc.Es7Client + + dataset := gc.Dataset + dataType := gc.DataType + fmt.Printf("[%s] - ClearDataset hit: [%s] - [%s]!\n", time.Now(), dataset.String(), dataType) + + var ( + deletionCount = 0.0 + g = new(errgroup.Group) + ) + // request #1 + g.Go(func() error { + deleteResponse, delErr := esRepo.DeleteVariantsByDatasetId(cfg, es, dataset.String()) + + if delErr != nil { + fmt.Printf("Failed to delete dataset %s variants\n", dataset) + return delErr + } + + deletionCount = deleteResponse["deleted"].(float64) + + return nil + }) + + if err := g.Wait(); err == nil { + fmt.Printf("Deleted %f variants from dataset %s\n", deletionCount, dataset) + return c.NoContent(http.StatusNoContent) + } else { + return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) + } +} + type DataTypeSummary struct { Id string `json:"id"` Label string `json:"label"` diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index e5dc733b..32b1f963 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -668,6 +668,62 @@ func GetVariantsBucketsByKeywordAndDataset(cfg *models.Config, es *elasticsearch return result, nil } +func DeleteVariantsByDatasetId(cfg *models.Config, es *elasticsearch.Client, dataset string) (map[string]interface{}, error) { + + var buf bytes.Buffer + query := map[string]interface{}{ + "query": map[string]interface{}{ + "match": map[string]interface{}{ + "dataset": dataset, + }, + }, + } + + if err := json.NewEncoder(&buf).Encode(query); err != nil { + log.Fatalf("Error encoding query: %s\n", query) + } + + if cfg.Debug { + // view the outbound elasticsearch query + myString := string(buf.Bytes()[:]) + fmt.Println(myString) + } + + // Perform the delete request. + deleteRes, deleteErr := es.DeleteByQuery( + []string{wildcardVariantsIndex}, + bytes.NewReader(buf.Bytes()), + ) + if deleteErr != nil { + fmt.Printf("Error getting response: %s\n", deleteErr) + return nil, deleteErr + } + + defer deleteRes.Body.Close() + + resultString := deleteRes.String() + if cfg.Debug { + fmt.Println(resultString) + } + + // Prepare an empty interface + result := make(map[string]interface{}) + + // Unmarshal or Decode the JSON to the empty interface. + // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming + bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) + if !strings.Contains(bracketString, "200") { + return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) + } + umErr := json.Unmarshal([]byte(jsonBodyString), &result) + if umErr != nil { + fmt.Printf("Error unmarshalling variant deletion response: %s\n", umErr) + return nil, umErr + } + + return result, nil +} + // -- internal use only -- func addAllelesToShouldMap(alleles []string, genotype c.GenotypeQuery, allelesShouldMap []map[string]interface{}) ([]map[string]interface{}, int) { minimumShouldMatch := 0 From afda847f930c83a5e5cd6641d34bc1912fabc69b Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 28 Aug 2023 19:51:04 +0000 Subject: [PATCH 44/49] chore: add dev-container metadata to dev image --- src/api/dev.Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 601835db..e286c785 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -3,6 +3,16 @@ ARG BUILDER_BASE_IMAGE # Stage 1 - builder FROM $BUILDER_BASE_IMAGE as builder +LABEL org.opencontainers.image.description="Local development image for Bento Gohan." +LABEL devcontainer.metadata='[{ \ + "customizations": { \ + "vscode": { \ + "extensions": ["golang.go", "eamodio.gitlens"], \ + "settings": {"workspaceFolder": "/gohan-api"} \ + } \ + } \ +}]' + # Maintainer LABEL maintainer="Brennan Brouillette " From f55926ea5ec8ae0c2bdfc17c945cf22a1c4e5e20 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 29 Aug 2023 15:56:05 -0400 Subject: [PATCH 45/49] chore: update drs version, authz disabled --- docker-compose.yaml | 1 + etc/example.env | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 7dedd6aa..aaa5657b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -151,6 +151,7 @@ services: - DATABASE=/drs/bento_drs/data/db/ # slightly confused naming, folder for database to go in - DATA=/drs/bento_drs/data/obj/ # DRS file objects, vs. the database - INTERNAL_PORT=${GOHAN_DRS_INTERNAL_PORT} + - AUTHZ_ENABLED=False volumes: - ${GOHAN_DRS_DATA_DIR}:/drs/bento_drs/data - ${GOHAN_API_DRS_BRIDGE_HOST_DIR}:${GOHAN_DRS_API_DRS_BRIDGE_DIR_CONTAINERIZED} diff --git a/etc/example.env b/etc/example.env index 6b4d4e1f..902c3fc4 100644 --- a/etc/example.env +++ b/etc/example.env @@ -114,7 +114,7 @@ GOHAN_KB_ES_PORT=9200 # DRS GOHAN_DRS_IMAGE=ghcr.io/bento-platform/bento_drs -GOHAN_DRS_VERSION=0.9.0 +GOHAN_DRS_VERSION=0.12.3 GOHAN_DRS_CONTAINER_NAME=gohan-drs GOHAN_DRS_INTERNAL_PORT=5000 GOHAN_DRS_EXTERNAL_PORT=6000 @@ -164,4 +164,4 @@ GOHAN_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC GOHAN_ES_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_ES_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC_PORT} GOHAN_DRS_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_DRS_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME} -GOHAN_KB_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_KB_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC_PORT} \ No newline at end of file +GOHAN_KB_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_KB_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC_PORT} From 299e20e377d94a96d189236fb179b3bccc1750ac Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 10:37:49 -0400 Subject: [PATCH 46/49] chore: set bento_user and git in dev image --- src/api/dev.Dockerfile | 5 ++++- src/api/entrypoint.bash | 17 +++++++++++++++++ src/api/run.dev.bash | 7 +++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 src/api/entrypoint.bash create mode 100644 src/api/run.dev.bash diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index e286c785..59eebb14 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -33,5 +33,8 @@ RUN go mod download && go mod vendor # Repository mounted to the container WORKDIR /gohan-api/src/api +COPY entrypoint.bash . +COPY run.dev.bash . -CMD [ "air" ] +ENTRYPOINT [ "bash", "./entrypoint.bash" ] +CMD [ "bash", "./run.dev.bash" ] diff --git a/src/api/entrypoint.bash b/src/api/entrypoint.bash new file mode 100644 index 00000000..b11103b3 --- /dev/null +++ b/src/api/entrypoint.bash @@ -0,0 +1,17 @@ +#!/bin/bash + +cd /gohan-api || exit + +# Create bento_user and home +source /create_service_user.bash + +# Create dev build directory +mkdir -p src/api/tmp + +# Set permissions / groups +chown -R bento_user:bento_user ./ +chown -R bento_user:bento_user /app +chmod -R o-rwx src/api/tmp + +# Drop into bento_user from root and execute the CMD specified for the image +exec gosu bento_user "$@" diff --git a/src/api/run.dev.bash b/src/api/run.dev.bash new file mode 100644 index 00000000..4a390876 --- /dev/null +++ b/src/api/run.dev.bash @@ -0,0 +1,7 @@ +#!/bin/bash + +# Set .gitconfig for development +/set_gitconfig.bash + +# Start gohan-api with hot reload using Air +air From 0a9836ae65d62d74cffe3eb797142ba7892c2b18 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 12:14:53 -0400 Subject: [PATCH 47/49] chore: bump base image version --- etc/example.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/example.env b/etc/example.env index 902c3fc4..1ecb98ed 100644 --- a/etc/example.env +++ b/etc/example.env @@ -39,7 +39,7 @@ GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye -GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.03.06 +GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.08.16.2000 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 From 5c868092a61c5e374995e9356bca774981ef0c75 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 15:42:35 -0400 Subject: [PATCH 48/49] chore: use bento golang base image --- .github/workflows/api.build.yml | 2 -- .github/workflows/api.test.yml | 3 +-- docker-compose.yaml | 1 - etc/example.env | 3 +-- src/api/Dockerfile | 15 ++------------- src/api/dev.Dockerfile | 7 +++---- 6 files changed, 7 insertions(+), 24 deletions(-) diff --git a/.github/workflows/api.build.yml b/.github/workflows/api.build.yml index 87ef8983..496f8d21 100644 --- a/.github/workflows/api.build.yml +++ b/.github/workflows/api.build.yml @@ -29,7 +29,6 @@ jobs: run: | cp ./etc/example.env .env source .env - echo "GOHAN_API_BUILDER_BASE_IMAGE=$GOHAN_API_BUILDER_BASE_IMAGE" >> $GITHUB_ENV echo "GOHAN_API_BASE_IMAGE=$GOHAN_API_BASE_IMAGE" >> $GITHUB_ENV - name: Load environment variables from .env file @@ -40,7 +39,6 @@ jobs: with: context: "{{defaultContext}}:src/api" build-args: | - BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} registry: ghcr.io registry-username: ${{ github.actor }} diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index f958bb79..5f39ae43 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -34,7 +34,6 @@ jobs: # with: # context: "{{defaultContext}}:src/api" # build-args: | - # BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} # BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} # registry: ghcr.io # registry-username: ${{ github.actor }} @@ -45,4 +44,4 @@ jobs: - name: API Test run: | sudo apt-get install -y tabix - make test-api \ No newline at end of file + make test-api diff --git a/docker-compose.yaml b/docker-compose.yaml index aaa5657b..641056ec 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -32,7 +32,6 @@ services: build: context: $PWD/src/api args: - BUILDER_BASE_IMAGE: ${GOHAN_API_BUILDER_BASE_IMAGE} BASE_IMAGE: ${GOHAN_API_BASE_IMAGE} # also passed in as an ENV from within Dockerfile : GOHAN_API_INTERNAL_PORT: ${GOHAN_API_INTERNAL_PORT} diff --git a/etc/example.env b/etc/example.env index 1ecb98ed..80aac62c 100644 --- a/etc/example.env +++ b/etc/example.env @@ -38,8 +38,7 @@ GOHAN_GATEWAY_CPUS=2 GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest -GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye -GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.08.16.2000 +GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.08.30 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 diff --git a/src/api/Dockerfile b/src/api/Dockerfile index 5384b40f..57a6f933 100644 --- a/src/api/Dockerfile +++ b/src/api/Dockerfile @@ -1,13 +1,11 @@ -ARG BUILDER_BASE_IMAGE ARG BASE_IMAGE -# Stage 1 - builder -FROM $BUILDER_BASE_IMAGE as builder +FROM $BASE_IMAGE # Maintainer LABEL maintainer="Brennan Brouillette " -WORKDIR /build +WORKDIR /app COPY . . @@ -15,10 +13,6 @@ COPY . . RUN go mod vendor && \ go build -ldflags="-s -w" -o gohan_api - -# Stage two - executioner -FROM $BASE_IMAGE - # Debian updates # - tabix for indexing VCFs # - other base dependencies provided by the base image @@ -27,11 +21,6 @@ RUN apt-get update -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* -WORKDIR /app - -# Copy pre-built executable from builder stage -COPY --from=builder /build/gohan_api . - # Copy static workflow files COPY workflows/*.wdl /app/workflows/ diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 59eebb14..394ab49f 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,7 +1,6 @@ -ARG BUILDER_BASE_IMAGE +ARG BASE_IMAGE -# Stage 1 - builder -FROM $BUILDER_BASE_IMAGE as builder +FROM $BASE_IMAGE LABEL org.opencontainers.image.description="Local development image for Bento Gohan." LABEL devcontainer.metadata='[{ \ @@ -37,4 +36,4 @@ COPY entrypoint.bash . COPY run.dev.bash . ENTRYPOINT [ "bash", "./entrypoint.bash" ] -CMD [ "bash", "./run.dev.bash" ] +CMD [ "air" ] From cca5188fb9322a8a867d4539fe041bc373dc15ae Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 16:12:05 -0400 Subject: [PATCH 49/49] minimize prod image size with 2 stages --- .github/workflows/api.build.yml | 8 ++++++-- docker-compose.yaml | 4 +++- etc/example.env | 4 +++- src/api/Dockerfile | 15 ++++++++++++--- src/api/dev.Dockerfile | 4 ++-- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/.github/workflows/api.build.yml b/.github/workflows/api.build.yml index 496f8d21..5da512d1 100644 --- a/.github/workflows/api.build.yml +++ b/.github/workflows/api.build.yml @@ -29,7 +29,9 @@ jobs: run: | cp ./etc/example.env .env source .env - echo "GOHAN_API_BASE_IMAGE=$GOHAN_API_BASE_IMAGE" >> $GITHUB_ENV + echo "GOHAN_API_BUILDER_BASE_IMAGE=$GOHAN_API_BUILDER_BASE_IMAGE" >> $GITHUB_ENV + echo "GOHAN_API_DEV_BASE_IMAGE=$GOHAN_API_DEV_BASE_IMAGE" >> $GITHUB_ENV + echo "GOHAN_API_PROD_BASE_IMAGE=$GOHAN_API_PROD_BASE_IMAGE" >> $GITHUB_ENV - name: Load environment variables from .env file uses: xom9ikk/dotenv@v2 @@ -39,7 +41,9 @@ jobs: with: context: "{{defaultContext}}:src/api" build-args: | - BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} + BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} + BASE_DEV_IMAGE=${{ env.GOHAN_API_DEV_BASE_IMAGE }} + BASE_PROD_IMAGE=${{ env.GOHAN_API_PROD_BASE_IMAGE }} registry: ghcr.io registry-username: ${{ github.actor }} registry-password: ${{ secrets.GITHUB_TOKEN }} diff --git a/docker-compose.yaml b/docker-compose.yaml index 641056ec..ab1e4dd1 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -32,7 +32,9 @@ services: build: context: $PWD/src/api args: - BASE_IMAGE: ${GOHAN_API_BASE_IMAGE} + BUILDER_BASE_IMAGE: ${GOHAN_API_BUILDER_BASE_IMAGE} + BASE_PROD_IMAGE: ${GOHAN_API_PROD_BASE_IMAGE} + BASE_DEV_IMAGE: ${GOHAN_API_PROD_BASE_IMAGE} # also passed in as an ENV from within Dockerfile : GOHAN_API_INTERNAL_PORT: ${GOHAN_API_INTERNAL_PORT} networks: diff --git a/etc/example.env b/etc/example.env index 80aac62c..5dab7001 100644 --- a/etc/example.env +++ b/etc/example.env @@ -38,7 +38,9 @@ GOHAN_GATEWAY_CPUS=2 GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest -GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.08.30 +GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye +GOHAN_API_DEV_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.08.30 +GOHAN_API_PROD_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.08.30 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 diff --git a/src/api/Dockerfile b/src/api/Dockerfile index 57a6f933..e93e61f4 100644 --- a/src/api/Dockerfile +++ b/src/api/Dockerfile @@ -1,11 +1,13 @@ -ARG BASE_IMAGE +ARG BUILDER_BASE_IMAGE +ARG BASE_PROD_IMAGE -FROM $BASE_IMAGE +# Stage 1 - builder +FROM $BUILDER_BASE_IMAGE as builder # Maintainer LABEL maintainer="Brennan Brouillette " -WORKDIR /app +WORKDIR /build COPY . . @@ -13,6 +15,8 @@ COPY . . RUN go mod vendor && \ go build -ldflags="-s -w" -o gohan_api +FROM $BASE_PROD_IMAGE + # Debian updates # - tabix for indexing VCFs # - other base dependencies provided by the base image @@ -21,6 +25,11 @@ RUN apt-get update -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* +WORKDIR /app + +# Copy pre-built executable from builder stage +COPY --from=builder /build/gohan_api . + # Copy static workflow files COPY workflows/*.wdl /app/workflows/ diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 394ab49f..2b810113 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,6 +1,6 @@ -ARG BASE_IMAGE +ARG BASE_DEV_IMAGE -FROM $BASE_IMAGE +FROM $BASE_DEV_IMAGE LABEL org.opencontainers.image.description="Local development image for Bento Gohan." LABEL devcontainer.metadata='[{ \