diff --git a/dbs/buckets.go b/dbs/buckets.go index 1c4530e..40304ee 100644 --- a/dbs/buckets.go +++ b/dbs/buckets.go @@ -11,6 +11,8 @@ import ( "fmt" "io" "log" + + lexicon "github.com/CHESSComputing/golib/lexicon" ) // Buckets represents Buckets DBS DB table @@ -120,11 +122,11 @@ func (r *Buckets) Validate() error { if err := RecordValidator.Struct(*r); err != nil { return DecodeValidatorError(r, err) } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { msg := "invalid pattern for creation date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.buckets.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { msg := "invalid pattern for last modification date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.buckets.Validate") } diff --git a/dbs/datasets.go b/dbs/datasets.go index aed389a..cfadbc4 100644 --- a/dbs/datasets.go +++ b/dbs/datasets.go @@ -11,6 +11,7 @@ import ( "io" "log" + lexicon "github.com/CHESSComputing/golib/lexicon" "github.com/CHESSComputing/golib/utils" ) @@ -85,6 +86,7 @@ func (a *API) GetDataset() error { func (a *API) InsertDataset() error { // the API provides Reader which will be used by Decode function to load the HTTP payload // and cast it to Datasets data structure + log.Println("### InsertDataset", a) // read given input data, err := io.ReadAll(a.Reader) @@ -114,6 +116,11 @@ func (a *API) InsertDataset() error { CREATE_BY: a.CreateBy, MODIFY_BY: a.CreateBy, } + record.SetDefaults() + err = record.Validate() + if err != nil { + return Error(err, ValidateErrorCode, "validation error", "dbs.datasets.InsertDataset") + } err = insertParts(&rec, &record) if err != nil { return Error(err, CommitErrorCode, "", "dbs.insertRecord") @@ -330,10 +337,11 @@ func (r *Datasets) Insert(tx *sql.Tx) error { // //gocyclo:ignore func (r *Datasets) Validate() error { - if err := CheckPattern("did", r.DID); err != nil { + log.Printf("### Validate %+v did=%v", r, lexicon.CheckPattern("did", r.DID)) + if err := lexicon.CheckPattern("did", r.DID); err != nil { return Error(err, PatternErrorCode, "", "dbs.datasets.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { msg := "invalid pattern for creation date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.datasets.Validate") } diff --git a/dbs/dbs.go b/dbs/dbs.go index c5412ed..d0fcc3b 100644 --- a/dbs/dbs.go +++ b/dbs/dbs.go @@ -20,6 +20,7 @@ import ( "strings" "time" + lexicon "github.com/CHESSComputing/golib/lexicon" validator "github.com/go-playground/validator/v10" ) @@ -606,9 +607,9 @@ func OperatorValue(arg string) (string, string) { func ParseRuns(runs []string) ([]string, error) { var out []string for _, v := range runs { - if matched := intPattern.MatchString(v); matched { + if matched := lexicon.IntPattern.MatchString(v); matched { out = append(out, v) - } else if matched := runRangePattern.MatchString(v); matched { + } else if matched := lexicon.RunRangePattern.MatchString(v); matched { arr := strings.Split(v, "-") if len(arr) != 2 { msg := fmt.Sprintf("fail to parse run-range '%s'", v) diff --git a/dbs/files.go b/dbs/files.go index 3207337..53d5e52 100644 --- a/dbs/files.go +++ b/dbs/files.go @@ -12,6 +12,8 @@ import ( "io" "log" "strings" + + lexicon "github.com/CHESSComputing/golib/lexicon" ) // Files represents Files DBS DB table @@ -137,14 +139,14 @@ func (r *Files) Validate() error { if err := RecordValidator.Struct(*r); err != nil { return DecodeValidatorError(r, err) } - if err := CheckPattern("file", r.FILE); err != nil { + if err := lexicon.CheckPattern("file", r.FILE); err != nil { return Error(err, PatternErrorCode, "", "dbs.files.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { msg := "invalid pattern for creation date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.files.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { msg := "invalid pattern for last modification date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.files.Validate") } diff --git a/dbs/parents.go b/dbs/parents.go index 52c4a56..d8332d0 100644 --- a/dbs/parents.go +++ b/dbs/parents.go @@ -11,6 +11,8 @@ import ( "fmt" "io" "log" + + lexicon "github.com/CHESSComputing/golib/lexicon" ) // Parents represents Parents DBS DB table @@ -118,11 +120,11 @@ func (r *Parents) Validate() error { if err := RecordValidator.Struct(*r); err != nil { return DecodeValidatorError(r, err) } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { msg := "invalid pattern for creation date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.parents.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { msg := "invalid pattern for last modification date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.parents.Validate") } diff --git a/dbs/processing.go b/dbs/processing.go index ae9a1b4..46590d2 100644 --- a/dbs/processing.go +++ b/dbs/processing.go @@ -11,6 +11,8 @@ import ( "fmt" "io" "log" + + lexicon "github.com/CHESSComputing/golib/lexicon" ) // Processing represents Processing DBS DB table @@ -118,11 +120,11 @@ func (r *Processing) Validate() error { if err := RecordValidator.Struct(*r); err != nil { return DecodeValidatorError(r, err) } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { msg := "invalid pattern for creation date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.processing.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { msg := "invalid pattern for last modification date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.processing.Validate") } diff --git a/dbs/sites.go b/dbs/sites.go index b68674b..a3c11e7 100644 --- a/dbs/sites.go +++ b/dbs/sites.go @@ -11,6 +11,8 @@ import ( "fmt" "io" "log" + + lexicon "github.com/CHESSComputing/golib/lexicon" ) // Sites represents Sites DBS DB table @@ -118,11 +120,11 @@ func (r *Sites) Validate() error { if err := RecordValidator.Struct(*r); err != nil { return DecodeValidatorError(r, err) } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.CREATE_AT)); !matched { msg := "invalid pattern for creation date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.sites.Validate") } - if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { + if matched := lexicon.UnixTimePattern.MatchString(fmt.Sprintf("%d", r.MODIFY_AT)); !matched { msg := "invalid pattern for last modification date" return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.sites.Validate") } diff --git a/dbs/validator.go b/dbs/validator.go deleted file mode 100644 index a83c71c..0000000 --- a/dbs/validator.go +++ /dev/null @@ -1,401 +0,0 @@ -package dbs - -// DBS validator module -// Copyright (c) 2023 - Valentin Kuznetsov -// -import ( - "encoding/json" - "fmt" - "io/ioutil" - "log" - "net/http" - "regexp" - "strings" - - "github.com/CHESSComputing/golib/utils" -) - -// DBS string parameters -var strParameters = []string{ - "dataset", - "block_name", - "parent_dataset", - "release_version", - "pset_hash", - "app_name", - "output_module_label", - "global_tag", - "processing_version", - "acquisition_era_name", - "physics_group_name", - "file", - "primary_ds_name", - "primary_ds_type", - "processed_ds_name", - "data_tier_name", - "dataset_access_type", - "create_by", - "user", - "modify_by", -} - -// DBS integer parameters -var intParameters = []string{ - "cdate", - "ldate", - "min_cdate", - "max_cdate", - "min_ldate", - "max_ldate", - "dataset_id", - "prep_id", -} - -// DBS mix type parameters -var mixParameters = []string{"run_num"} - -// Lexicon represents single lexicon pattern structure -type Lexicon struct { - Name string `json:"name"` - Patterns []string `json:"patterns"` - Length int `json:"length"` -} - -func (r *Lexicon) String() string { - data, err := json.MarshalIndent(r, "", " ") - if err == nil { - return string(data) - } - return fmt.Sprintf("Lexicon: name=%s patters=%v length=%d", r.Name, r.Patterns, r.Length) -} - -// LexiconPattern represents single lexicon compiled pattern structure -type LexiconPattern struct { - Lexicon Lexicon - Patterns []*regexp.Regexp -} - -// LexiconPatterns represents CMS Lexicon patterns -var LexiconPatterns map[string]LexiconPattern - -// LoadPatterns loads CMS Lexion patterns from given file -// the format of the file is a list of the following dicts: -// [ {"name": , "patterns": [list of patterns], "length": int},...] -func LoadPatterns(fname string) (map[string]LexiconPattern, error) { - data, err := ioutil.ReadFile(fname) - if err != nil { - log.Printf("Unable to read, file '%s', error: %v\n", fname, err) - return nil, Error(err, ReaderErrorCode, "", "dbs.validator.LoadPatterns") - } - var records []Lexicon - err = json.Unmarshal(data, &records) - if err != nil { - log.Printf("Unable to parse, file '%s', error: %v\n", fname, err) - return nil, Error(err, UnmarshalErrorCode, "", "dbs.validator.LoadPatterns") - } - // fetch and compile all patterns - pmap := make(map[string]LexiconPattern) - for _, rec := range records { - var patterns []*regexp.Regexp - for _, pat := range rec.Patterns { - patterns = append(patterns, regexp.MustCompile(pat)) - } - lex := LexiconPattern{Lexicon: rec, Patterns: patterns} - key := rec.Name - pmap[key] = lex - if Verbose > 1 { - log.Printf("regexp pattern\n%s", rec.String()) - } - } - return pmap, nil -} - -// aux patterns -var unixTimePattern = regexp.MustCompile(`^[1-9][0-9]{9}$`) -var intPattern = regexp.MustCompile(`^\d+$`) -var runRangePattern = regexp.MustCompile(`^\d+-\d+$`) - -// ObjectPattern represents interface to check different objects -type ObjectPattern interface { - Check(k string, v interface{}) error -} - -// StrPattern represents string object pattern -type StrPattern struct { - Patterns []*regexp.Regexp - Len int -} - -// Check implements ObjectPattern interface for StrPattern objects -func (o StrPattern) Check(key string, val interface{}) error { - if Verbose > 0 { - log.Printf("StrPatern check key=%s val=%v", key, val) - log.Printf("patterns %v max length %v", o.Patterns, o.Len) - } - var v string - switch vvv := val.(type) { - case string: - v = vvv - default: - msg := fmt.Sprintf( - "invalid type of input parameter '%s' for value '%+v' type '%T'", - key, val, val) - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.validator.Check") - } - if len(o.Patterns) == 0 { - // nothing to match in patterns - if Verbose > 0 { - log.Println("nothing to match since we do not have patterns") - } - return nil - } - if o.Len > 0 && len(v) > o.Len { - if Verbose > 0 { - log.Println("lexicon str pattern", o) - } - // check for list of LFNs - if key == "file" { - for _, lfn := range lfnList(v) { - if len(lfn) > o.Len { - msg := fmt.Sprintf("length of LFN %s exceed %d characters", lfn, o.Len) - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.validator.Check") - } - } - } else { - msg := fmt.Sprintf("length of %s exceed %d characters", v, o.Len) - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.validator.Check") - } - } - if key == "file" { - for _, vvv := range lfnList(v) { - msg := fmt.Sprintf("unable to match '%s' value '%s' from LFN list", key, vvv) - var pass bool - for _, pat := range o.Patterns { - if matched := pat.MatchString(vvv); matched { - // if at least one pattern matched we'll return - pass = true - break - } - } - if !pass { - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.validator.Check") - } - } - return nil - } - msg := fmt.Sprintf("unable to match '%s' value '%s'", key, val) - for _, pat := range o.Patterns { - if matched := pat.MatchString(v); matched { - // if at least one pattern matched we'll return - return nil - } - } - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.validator.Check") -} - -// helper function to convert input value into list of list -// we need it to properly match LFN list -func lfnList(v string) []string { - fileList := strings.Replace(v, "[", "", -1) - fileList = strings.Replace(fileList, "]", "", -1) - fileList = strings.Replace(fileList, "'", "", -1) - fileList = strings.Replace(fileList, "\"", "", -1) - var lfns []string - for _, val := range strings.Split(fileList, ",") { - lfns = append(lfns, strings.Trim(val, " ")) - } - return lfns -} - -// helper function to validate string parameters -// -//gocyclo:ignore -func strType(key string, val interface{}) error { - var v string - switch vvv := val.(type) { - case string: - v = vvv - default: - msg := fmt.Sprintf( - "invalid type of input parameter '%s' for value '%+v' type '%T'", - key, val, val) - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.validator.strType") - } - mapKeys := make(map[string]string) - mapKeys["dataset"] = "dataset" - mapKeys["block_name"] = "block_name" - mapKeys["file"] = "file" - mapKeys["create_by"] = "user" - mapKeys["modify_by"] = "user" - mapKeys["primary_ds_name"] = "primary_dataset" - mapKeys["processed_ds_name"] = "processed_dataset" - mapKeys["processing_version"] = "processing_version" - mapKeys["app_name"] = "application" - mapKeys["data_tier_name"] = "data_tier_name" - mapKeys["dataset"] = "dataset" - mapKeys["release_version"] = "cmssw_version" - var allowedWildCardKeys = []string{ - "primary_ds_name", - "processed_ds_name", - "processing_version", - "app_name", - "data_tier_name", - "release_version", - } - - var patterns []*regexp.Regexp - var length int - - for k, lkey := range mapKeys { - if key == k { - if utils.InList(k, allowedWildCardKeys) { - if v == "" && val == "*" { // when someone passed wildcard - return nil - } - } - if p, ok := LexiconPatterns[lkey]; ok { - patterns = p.Patterns - length = p.Lexicon.Length - } - } - if key == "file" { - if strings.Contains(v, "[") { - if strings.Contains(v, "'") { // Python bad json, e.g. ['bla'] - v = strings.Replace(v, "'", "\"", -1) - } - var records []string - err := json.Unmarshal([]byte(v), &records) - if err != nil { - return Error(err, UnmarshalErrorCode, "", "dbs.validator.strType") - } - for _, r := range records { - err := StrPattern{Patterns: patterns, Len: length}.Check(key, r) - if err != nil { - return Error(err, PatternErrorCode, "", "dbs.validator.strType") - } - } - } - } - if key == "block_name" { - if strings.Contains(v, "[") { - if strings.Contains(v, "'") { // Python bad json, e.g. ['bla'] - v = strings.Replace(v, "'", "\"", -1) - } - // split input into pieces - input := strings.Replace(v, "[", "", -1) - input = strings.Replace(input, "]", "", -1) - for _, vvv := range strings.Split(input, ",") { - err := checkBlockHash(strings.Trim(vvv, " ")) - if err != nil { - return err - } - } - } else { - err := checkBlockHash(v) - if err != nil { - return err - } - } - } - } - return StrPattern{Patterns: patterns, Len: length}.Check(key, val) -} - -// helper function to check block hash -func checkBlockHash(blk string) error { - arr := strings.Split(blk, "#") - if len(arr) != 2 { - msg := fmt.Sprintf("wrong parts in block name %s", blk) - return Error(ValidationErr, PatternErrorCode, msg, "dbs.validator.checkBlockHash") - } - if len(arr[1]) > 36 { - msg := fmt.Sprintf("wrong length of block hash %s", blk) - return Error(ValidationErr, PatternErrorCode, msg, "dbs.validator.checkBlockHash") - } - return nil -} - -// helper function to validate int parameters -func intType(k string, v interface{}) error { - // to be implemented - return nil -} - -// helper function to validate mix parameters -func mixType(k string, v interface{}) error { - // to be implemented - return nil -} - -// Validate provides validation of all input parameters of HTTP request -func Validate(r *http.Request) error { - if r.Method == "GET" { - for k, vvv := range r.URL.Query() { - // vvv here is []string{} type since all HTTP parameters are treated - // as list of strings - for _, v := range vvv { - if utils.InList(k, strParameters) { - if err := strType(k, v); err != nil { - return Error(err, ValidateErrorCode, "not str type", "dbs.Validate") - } - } - if utils.InList(k, intParameters) { - if err := intType(k, v); err != nil { - return Error(err, ValidateErrorCode, "not int type", "dbs.Validate") - } - } - if utils.InList(k, mixParameters) { - if err := mixType(k, v); err != nil { - return Error(err, ValidateErrorCode, "not mix type", "dbs.Validate") - } - } - } - if Verbose > 0 { - log.Printf("query parameter key=%s values=%+v\n", k, vvv) - } - } - } - return nil -} - -// CheckPattern is a generic functino to check given key value within Lexicon map -func CheckPattern(key, value string) error { - if p, ok := LexiconPatterns[key]; ok { - for _, pat := range p.Patterns { - if matched := pat.MatchString(value); matched { - if Verbose > 1 { - log.Printf("CheckPattern key=%s value='%s' found match %s", key, value, pat) - } - return nil - } - if Verbose > 1 { - log.Printf("CheckPattern key=%s value='%s' does not match %s", key, value, pat) - } - } - msg := fmt.Sprintf("invalid pattern for key=%s", key) - return Error(InvalidParamErr, PatternErrorCode, msg, "dbs.CheckPattern") - } - return nil -} - -// ValidatePostPayload function to validate POST request -func ValidatePostPayload(rec map[string]any) error { - for key, val := range rec { - errMsg := fmt.Sprintf("unable to match '%s' value '%+v'", key, val) - if key == "data_tier_name" { - if vvv, ok := val.(string); ok { - if err := CheckPattern("data_tier_name", vvv); err != nil { - return Error(err, PatternErrorCode, "wrong data_tier_name pattern", "dbs.ValidaatePostPayload") - } - } - } else if key == "create_at" || key == "modify_at" { - v, err := CastInt(val) - if err != nil { - return Error(err, PatternErrorCode, errMsg, "dbs.ValidaatePostPayload") - } else if matched := unixTimePattern.MatchString(fmt.Sprintf("%d", v)); !matched { - return Error(InvalidParamErr, PatternErrorCode, errMsg, "dbs.ValidaatePostPayload") - } - } - } - return nil -} diff --git a/main_test.go b/main_test.go index aef2a8c..a645e12 100644 --- a/main_test.go +++ b/main_test.go @@ -12,6 +12,7 @@ import ( "github.com/CHESSComputing/DataBookkeeping/dbs" srvConfig "github.com/CHESSComputing/golib/config" + "github.com/CHESSComputing/golib/lexicon" server "github.com/CHESSComputing/golib/server" "github.com/gin-gonic/gin" validator "github.com/go-playground/validator/v10" @@ -29,11 +30,11 @@ func initDB(dryRun bool, dburi string) *sql.DB { log.Fatal("unable to get current working dir") } // load Lexicon patterns - lexPatterns, err := dbs.LoadPatterns(srvConfig.Config.DataBookkeeping.LexiconFile) + lexPatterns, err := lexicon.LoadPatterns(srvConfig.Config.DataBookkeeping.LexiconFile) if err != nil { log.Fatal(err) } - dbs.LexiconPatterns = lexPatterns + lexicon.LexiconPatterns = lexPatterns dbs.StaticDir = "static" dbtype := "sqlite3" diff --git a/server.go b/server.go index b5e9410..c2a6320 100644 --- a/server.go +++ b/server.go @@ -31,6 +31,7 @@ import ( "github.com/CHESSComputing/DataBookkeeping/dbs" srvConfig "github.com/CHESSComputing/golib/config" + "github.com/CHESSComputing/golib/lexicon" server "github.com/CHESSComputing/golib/server" "github.com/gin-gonic/gin" validator "github.com/go-playground/validator/v10" @@ -123,11 +124,11 @@ func Server() { defer dbs.DB.Close() // load Lexicon patterns - lexPatterns, err := dbs.LoadPatterns(srvConfig.Config.DataBookkeeping.LexiconFile) + lexPatterns, err := lexicon.LoadPatterns(srvConfig.Config.DataBookkeeping.LexiconFile) if err != nil { log.Fatal(err) } - dbs.LexiconPatterns = lexPatterns + lexicon.LexiconPatterns = lexPatterns // setup web router and start the service r := setupRouter()