From 6b9dc56f2dff358d52eb2b06c6ee168698102803 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 17 Oct 2023 17:55:16 +0200 Subject: [PATCH 01/34] Copy `backup` from sda-pipeline as `sync` --- sda/cmd/sync/sync.go | 546 ++++++++++++++++++++++++++++++++++++++ sda/cmd/sync/sync.md | 172 ++++++++++++ sda/cmd/sync/sync_test.go | 34 +++ 3 files changed, 752 insertions(+) create mode 100644 sda/cmd/sync/sync.go create mode 100644 sda/cmd/sync/sync.md create mode 100644 sda/cmd/sync/sync_test.go diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go new file mode 100644 index 000000000..6b0614f3b --- /dev/null +++ b/sda/cmd/sync/sync.go @@ -0,0 +1,546 @@ +// The backup command accepts messages with accessionIDs for +// ingested files and copies them to the second storage. +package main + +import ( + "encoding/hex" + "encoding/json" + "io" + "strings" + + "sda-pipeline/internal/broker" + "sda-pipeline/internal/config" + "sda-pipeline/internal/database" + "sda-pipeline/internal/storage" + + "github.com/neicnordic/crypt4gh/model/headers" + log "github.com/sirupsen/logrus" + "golang.org/x/crypto/chacha20poly1305" +) + +// Backup struct that holds the json message data +type backup struct { + Type string `json:"type,omitempty"` + User string `json:"user"` + Filepath string `json:"filepath"` + AccessionID string `json:"accession_id"` + DecryptedChecksums []checksums `json:"decrypted_checksums"` +} + +// Checksums is struct for the checksum type and value +type checksums struct { + Type string `json:"type"` + Value string `json:"value"` +} + +func main() { + forever := make(chan bool) + conf, err := config.NewConfig("backup") + if err != nil { + log.Fatal(err) + } + mq, err := broker.NewMQ(conf.Broker) + if err != nil { + log.Fatal(err) + } + db, err := database.NewDB(conf.Database) + if err != nil { + log.Fatal(err) + } + backupStorage, err := storage.NewBackend(conf.Backup) + if err != nil { + log.Fatal(err) + } + archive, err := storage.NewBackend(conf.Archive) + if err != nil { + log.Fatal(err) + } + + // we don't need crypt4gh keys if copyheader disabled + var key *[32]byte + var publicKey *[32]byte + if config.CopyHeader() { + key, err = config.GetC4GHKey() + if err != nil { + log.Fatal(err) + } + + publicKey, err = config.GetC4GHPublicKey() + if err != nil { + log.Fatal(err) + } + } + + defer mq.Channel.Close() + defer mq.Connection.Close() + defer db.Close() + + go func() { + connError := mq.ConnectionWatcher() + log.Error(connError) + forever <- false + }() + + go func() { + connError := mq.ChannelWatcher() + log.Error(connError) + forever <- false + }() + + log.Info("Starting backup service") + var message backup + jsonSchema := "ingestion-completion" + + if conf.Broker.Queue == "accessionIDs" { + jsonSchema = "ingestion-accession" + } + + go func() { + messages, err := mq.GetMessages(conf.Broker.Queue) + if err != nil { + log.Fatal(err) + } + for delivered := range messages { + log.Debugf("Received a message (corr-id: %s, message: %s)", + delivered.CorrelationId, + delivered.Body) + + err := mq.ValidateJSON(&delivered, + jsonSchema, + delivered.Body, + &message) + + if err != nil { + log.Errorf("Validation of incoming message failed "+ + "(corr-id: %s, error: %v)", + delivered.CorrelationId, + err) + + continue + } + + // we unmarshal the message in the validation step so this is safe to do + _ = json.Unmarshal(delivered.Body, &message) + + log.Infof("Received work (corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums) + + // Extract the sha256 from the message and use it for the database + var checksumSha256 string + for _, checksum := range message.DecryptedChecksums { + if checksum.Type == "sha256" { + checksumSha256 = checksum.Value + } + } + + var filePath string + var fileSize int + if filePath, fileSize, err = db.GetArchived(message.User, message.Filepath, checksumSha256); err != nil { + log.Errorf("GetArchived failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + // nack the message but requeue until we fixed the SQL retry. + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of GetArchived failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + + continue + } + + log.Debug("Backup initiated") + + // Get size on disk, will also give some time for the file to + // appear if it has not already + + diskFileSize, err := archive.GetFileSize(filePath) + + if err != nil { + log.Errorf("Failed to get size info for archived file %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of GetFileSize failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + + continue + } + + if diskFileSize != int64(fileSize) { + log.Errorf("File size in archive does not match database for archive file %s "+ + "- archive size is %d, database has %d "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + diskFileSize, + fileSize, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of file size differences failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + + continue + } + + file, err := archive.NewFileReader(filePath) + if err != nil { + log.Errorf("Failed to open archived file %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + //FIXME: should it retry? + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of NewFileReader failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + + continue + } + + // If the copy header is enabled, use the actual filepath to make backup + // This will be used in the BigPicture backup, enabling for ingestion of the file + if config.CopyHeader() { + filePath = message.Filepath + } + + dest, err := backupStorage.NewFileWriter(filePath) + if err != nil { + log.Errorf("Failed to open backup file %s for writing "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + //FIXME: should it retry? + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of NewFileWriter failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + + continue + } + + // Check if the header is needed + //nolint:nestif + if config.CopyHeader() { + // Get the header from db + header, err := db.GetHeaderForStableID(message.AccessionID) + if err != nil { + log.Errorf("GetHeaderForStableID failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + } + + // Decrypt header + log.Debug("Decrypt header") + DecHeader, err := FormatHexHeader(header) + if err != nil { + log.Errorf("Failed to decode the header %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of decode header failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + } + + // Reencrypt header + log.Debug("Reencrypt header") + pubkeyList := [][chacha20poly1305.KeySize]byte{} + pubkeyList = append(pubkeyList, *publicKey) + newHeader, err := headers.ReEncryptHeader(DecHeader, *key, pubkeyList) + if err != nil { + log.Errorf("Failed to reencrypt the header %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of reencrypt header failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + } + + // write header to destination file + _, err = dest.Write(newHeader) + if err != nil { + log.Errorf("Failed to write the header to destination %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + } + } + + // Copy the file and check is sizes match + copiedSize, err := io.Copy(dest, file) + if err != nil || copiedSize != int64(fileSize) { + log.Errorf("Failed to copy file "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + //FIXME: should it retry? + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of Copy failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + e) + } + + continue + } + + file.Close() + dest.Close() + + log.Infof("Backuped file %s (%d bytes) from archive to backup "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v)", + filePath, + fileSize, + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums) + + if err := mq.SendMessage(delivered.CorrelationId, conf.Broker.Exchange, conf.Broker.RoutingKey, conf.Broker.Durable, delivered.Body); err != nil { + // TODO fix resend mechanism + log.Errorf("Failed to send message for completed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + // Restart loop, do not ack + continue + } + + if err := delivered.Ack(false); err != nil { + + log.Errorf("Failed to ack message after work completed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.Filepath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + + } + } + }() + + <-forever +} + +// FormatHexHeader decodes a hex formatted file header, and returns the data as a binary +func FormatHexHeader(hexData string) ([]byte, error) { + + // Trim whitespace that might otherwise confuse the hex parse + headerHexStr := strings.TrimSpace(hexData) + + // Decode the hex + binaryHeader, err := hex.DecodeString(headerHexStr) + if err != nil { + return nil, err + } + + return binaryHeader, nil +} diff --git a/sda/cmd/sync/sync.md b/sda/cmd/sync/sync.md new file mode 100644 index 000000000..4b24235c8 --- /dev/null +++ b/sda/cmd/sync/sync.md @@ -0,0 +1,172 @@ +# sda-pipeline: backup + +Moves data to backup storage and optionally merges it with the encryption header. + +## Configuration + +There are a number of options that can be set for the backup service. +These settings can be set by mounting a yaml-file at `/config.yaml` with settings. + +ex. +```yaml +log: + level: "debug" + format: "json" +``` +They may also be set using environment variables like: +```bash +export LOG_LEVEL="debug" +export LOG_FORMAT="json" +``` + +### Backup specific settings + + - `BACKUP_COPYHEADER`: if `true`, the backup service will reencrypt and add headers to the backup files. + +#### Keyfile settings + +These settings control which crypt4gh keyfile is loaded. +These settings are only needed is `copyheader` is `true`. + + - `C4GH_FILEPATH`: path to the crypt4gh keyfile + - `C4GH_PASSPHRASE`: pass phrase to unlock the keyfile + - `C4GH_BACKUPPUBKEY`: path to the crypt4gh public key to use for reencrypting file headers. + +### RabbitMQ broker settings + +These settings control how backup connects to the RabbitMQ message broker. + + - `BROKER_HOST`: hostname of the rabbitmq server + + - `BROKER_PORT`: rabbitmq broker port (commonly `5671` with TLS and `5672` without) + + - `BROKER_QUEUE`: message queue to read messages from (commonly `backup`) + + - `BROKER_ROUTINGKEY`: message queue to write success messages to (commonly `completed`) + + - `BROKER_USER`: username to connect to rabbitmq + + - `BROKER_PASSWORD`: password to connect to rabbitmq + + - `BROKER_PREFETCHCOUNT`: Number of messages to pull from the message server at the time (default to 2) + +### PostgreSQL Database settings: + + - `DB_HOST`: hostname for the postgresql database + + - `DB_PORT`: database port (commonly 5432) + + - `DB_USER`: username for the database + + - `DB_PASSWORD`: password for the database + + - `DB_DATABASE`: database name + + - `DB_SSLMODE`: The TLS encryption policy to use for database connections. + Valid options are: + - `disable` + - `allow` + - `prefer` + - `require` + - `verify-ca` + - `verify-full` + + More information is available + [in the postgresql documentation](https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION) + + Note that if `DB_SSLMODE` is set to anything but `disable`, then `DB_CACERT` needs to be set, + and if set to `verify-full`, then `DB_CLIENTCERT`, and `DB_CLIENTKEY` must also be set + + - `DB_CLIENTKEY`: key-file for the database client certificate + + - `DB_CLIENTCERT`: database client certificate file + + - `DB_CACERT`: Certificate Authority (CA) certificate for the database to use + +### Storage settings + +Storage backend is defined by the `ARCHIVE_TYPE`, and `BACKUP_TYPE` variables. +Valid values for these options are `S3` or `POSIX` +(Defaults to `POSIX` on unknown values). + +The value of these variables define what other variables are read. +The same variables are available for all storage types, differing by prefix (`ARCHIVE_`, or `BACKUP_`) + +if `*_TYPE` is `S3` then the following variables are available: + - `*_URL`: URL to the S3 system + - `*_ACCESSKEY`: The S3 access and secret key are used to authenticate to S3, + [more info at AWS](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) + - `*_SECRETKEY`: The S3 access and secret key are used to authenticate to S3, + [more info at AWS](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) + - `*_BUCKET`: The S3 bucket to use as the storage root + - `*_PORT`: S3 connection port (default: `443`) + - `*_REGION`: S3 region (default: `us-east-1`) + - `*_CHUNKSIZE`: S3 chunk size for multipart uploads. +# CA certificate is only needed if the S3 server has a certificate signed by a private entity + - `*_CACERT`: Certificate Authority (CA) certificate for the storage system + +and if `*_TYPE` is `POSIX`: + - `*_LOCATION`: POSIX path to use as storage root + +### Logging settings: + + - `LOG_FORMAT` can be set to “json” to get logs in json format. + All other values result in text logging + + - `LOG_LEVEL` can be set to one of the following, in increasing order of severity: + - `trace` + - `debug` + - `info` + - `warn` (or `warning`) + - `error` + - `fatal` + - `panic` + +## Service Description +The backup service copies files from the archive storage to backup storage. If a public key is supplied and the copyHeader option is enabled the header will be re-encrypted and attached to the file before writing it to backup storage. + +When running, backup reads messages from the configured RabbitMQ queue (default "backup"). +For each message, these steps are taken (if not otherwise noted, errors halts progress, the message is Nack'ed, and the service moves on to the next message): + +1. The message is validated as valid JSON that matches either the "ingestion-completion" or "ingestion-accession" schema (based on configuration). +If the message can’t be validated it is discarded with an error message in the logs. + +1. The file path and file size is fetched from the database. + 1. In case the service is configured to copy headers, the path is replaced by the one of the incoming message and it is the original location where the file was uploaded in the inbox. + +1. The file size on disk is requested from the storage system. + +1. The database file size is compared against the disk file size. + +1. A file reader is created for the archive storage file, and a file writer is created for the backup storage file. + +1. If the service is configured to copy headers: + + 1. The header is read from the database. + On error, the error is written to the logs, but the message continues processing. + + 1. The header is decrypted. + If this causes an error, the error is written to the logs, the message is Nack'ed, but message processing continues. + + 1. The header is reencrypted. + If this causes an error, the error is written to the logs, the message is Nack'ed, but message processing continues. + + 1. The header is written to the backup file writer. + On error, the error is written to the logs, but the message continues processing. + +1. The file data is copied from the archive file reader to the backup file writer. + +1. A completed message is sent to RabbitMQ, if this fails a message is written to the logs, and the message is neither nack'ed nor ack'ed. + +1. The message is Ack'ed. + +## Communication + + - Backup reads messages from one rabbitmq queue (default `backup`) + + - Backup writes messages to one rabbitmq queue (default `completed`) + + - Backup optionally reads encryption headers from the database and can not be started without a database connection. + This is done using the `GetArchived`, and `GetHeaderForStableID` functions. + + - Backup reads data from archive storage and writes data to backup storage. diff --git a/sda/cmd/sync/sync_test.go b/sda/cmd/sync/sync_test.go new file mode 100644 index 000000000..2fa0bf9ee --- /dev/null +++ b/sda/cmd/sync/sync_test.go @@ -0,0 +1,34 @@ +package main + +import ( + "testing" + + "github.com/spf13/viper" + "github.com/stretchr/testify/suite" +) + +type TestSuite struct { + suite.Suite +} + +func TestBackupTestSuite(t *testing.T) { + suite.Run(t, new(TestSuite)) +} + +func (suite *TestSuite) SetupTest() { + viper.Set("log.level", "debug") + viper.Set("archive.location", "../../dev_utils") + viper.Set("backup.location", "../../dev_utils") + + viper.Set("broker.host", "test") + viper.Set("broker.port", 123) + viper.Set("broker.user", "test") + viper.Set("broker.password", "test") + viper.Set("broker.queue", "test") + viper.Set("broker.routingkey", "test") + viper.Set("db.host", "test") + viper.Set("db.port", 123) + viper.Set("db.user", "test") + viper.Set("db.password", "test") + viper.Set("db.database", "test") +} From 227f890449b9c3ff5e96ecd3fe5e69e32fe8fec7 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Wed, 18 Oct 2023 12:23:00 +0200 Subject: [PATCH 02/34] [sync] make it work with the merged code base --- sda/cmd/sync/sync.go | 106 ++++++++------------- sda/internal/config/config.go | 67 ++++++++++++- sda/internal/config/config_test.go | 2 +- sda/internal/database/db_functions.go | 15 +++ sda/internal/database/db_functions_test.go | 20 ++++ 5 files changed, 139 insertions(+), 71 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index 6b0614f3b..df0b32065 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -5,37 +5,23 @@ package main import ( "encoding/hex" "encoding/json" + "fmt" "io" "strings" - "sda-pipeline/internal/broker" - "sda-pipeline/internal/config" - "sda-pipeline/internal/database" - "sda-pipeline/internal/storage" - "github.com/neicnordic/crypt4gh/model/headers" + "github.com/neicnordic/sensitive-data-archive/internal/broker" + "github.com/neicnordic/sensitive-data-archive/internal/config" + "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/neicnordic/sensitive-data-archive/internal/schema" + "github.com/neicnordic/sensitive-data-archive/internal/storage" log "github.com/sirupsen/logrus" "golang.org/x/crypto/chacha20poly1305" ) -// Backup struct that holds the json message data -type backup struct { - Type string `json:"type,omitempty"` - User string `json:"user"` - Filepath string `json:"filepath"` - AccessionID string `json:"accession_id"` - DecryptedChecksums []checksums `json:"decrypted_checksums"` -} - -// Checksums is struct for the checksum type and value -type checksums struct { - Type string `json:"type"` - Value string `json:"value"` -} - func main() { forever := make(chan bool) - conf, err := config.NewConfig("backup") + conf, err := config.NewConfig("sync") if err != nil { log.Fatal(err) } @@ -43,11 +29,11 @@ func main() { if err != nil { log.Fatal(err) } - db, err := database.NewDB(conf.Database) + db, err := database.NewSDAdb(conf.Database) if err != nil { log.Fatal(err) } - backupStorage, err := storage.NewBackend(conf.Backup) + syncDestination, err := storage.NewBackend(conf.Sync) if err != nil { log.Fatal(err) } @@ -87,13 +73,8 @@ func main() { forever <- false }() - log.Info("Starting backup service") - var message backup - jsonSchema := "ingestion-completion" - - if conf.Broker.Queue == "accessionIDs" { - jsonSchema = "ingestion-accession" - } + log.Info("Starting sync service") + var message schema.IngestionCompletion go func() { messages, err := mq.GetMessages(conf.Broker.Queue) @@ -105,10 +86,7 @@ func main() { delivered.CorrelationId, delivered.Body) - err := mq.ValidateJSON(&delivered, - jsonSchema, - delivered.Body, - &message) + err := schema.ValidateJSON(fmt.Sprintf("%s/ingestion-completion.json", conf.Broker.SchemasPath), delivered.Body) if err != nil { log.Errorf("Validation of incoming message failed "+ @@ -128,22 +106,14 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums) - // Extract the sha256 from the message and use it for the database - var checksumSha256 string - for _, checksum := range message.DecryptedChecksums { - if checksum.Type == "sha256" { - checksumSha256 = checksum.Value - } - } - var filePath string var fileSize int - if filePath, fileSize, err = db.GetArchived(message.User, message.Filepath, checksumSha256); err != nil { + if filePath, fileSize, err = db.GetArchived(delivered.CorrelationId); err != nil { log.Errorf("GetArchived failed "+ "(corr-id: %s, "+ "filepath: %s, "+ @@ -151,7 +121,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -166,7 +136,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -192,7 +162,7 @@ func main() { "decryptedChecksums: %v, error: %v)", filePath, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -206,7 +176,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -228,7 +198,7 @@ func main() { diskFileSize, fileSize, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -242,7 +212,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -262,7 +232,7 @@ func main() { "decryptedChecksums: %v, error: %v)", filePath, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -277,7 +247,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -290,10 +260,10 @@ func main() { // If the copy header is enabled, use the actual filepath to make backup // This will be used in the BigPicture backup, enabling for ingestion of the file if config.CopyHeader() { - filePath = message.Filepath + filePath = message.FilePath } - dest, err := backupStorage.NewFileWriter(filePath) + dest, err := syncDestination.NewFileWriter(filePath) if err != nil { log.Errorf("Failed to open backup file %s for writing "+ "(corr-id: %s, "+ @@ -303,7 +273,7 @@ func main() { "decryptedChecksums: %v, error: %v)", filePath, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -318,7 +288,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -341,7 +311,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -360,7 +330,7 @@ func main() { "decryptedChecksums: %v, error: %v)", filePath, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -374,7 +344,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -396,7 +366,7 @@ func main() { "decryptedChecksums: %v, error: %v)", filePath, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -410,7 +380,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -429,7 +399,7 @@ func main() { "decryptedChecksums: %v, error: %v)", filePath, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -447,7 +417,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -462,7 +432,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -484,12 +454,12 @@ func main() { filePath, fileSize, delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums) - if err := mq.SendMessage(delivered.CorrelationId, conf.Broker.Exchange, conf.Broker.RoutingKey, conf.Broker.Durable, delivered.Body); err != nil { + if err := mq.SendMessage(delivered.CorrelationId, conf.Broker.Exchange, conf.Broker.RoutingKey, delivered.Body); err != nil { // TODO fix resend mechanism log.Errorf("Failed to send message for completed "+ "(corr-id: %s, "+ @@ -498,7 +468,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, @@ -517,7 +487,7 @@ func main() { "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", delivered.CorrelationId, - message.Filepath, + message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 96f3b50cd..af8941c47 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -46,6 +46,7 @@ type Config struct { API APIConf Notify SMTPConf Orchestrator OrchestratorConf + Sync storage.Conf } type APIConf struct { @@ -257,6 +258,40 @@ func NewConfig(app string) (*Config, error) { "inbox.bucket", } viper.Set("inbox.type", S3) + case "sync": + requiredConfVars = []string{ + "broker.host", + "broker.port", + "broker.user", + "broker.password", + "broker.queue", + "broker.routingkey", + "db.host", + "db.port", + "db.user", + "db.password", + "db.database", + } + + switch viper.GetString("archive.type") { + case S3: + requiredConfVars = append(requiredConfVars, []string{"archive.url", "archive.accesskey", "archive.secretkey", "archive.bucket"}...) + case POSIX: + requiredConfVars = append(requiredConfVars, []string{"archive.location"}...) + default: + return nil, fmt.Errorf("archive.type not set") + } + + switch viper.GetString("sync.destination.type") { + case S3: + requiredConfVars = append(requiredConfVars, []string{"sync.destination.url", "sync.destination.accesskey", "sync.destination.secretkey", "sync.destination.bucket"}...) + case POSIX: + requiredConfVars = append(requiredConfVars, []string{"sync.destination.location"}...) + case SFTP: + requiredConfVars = append(requiredConfVars, []string{"sync.destination.sftp.host", "sync.destination.sftp.port", "sync.destination.sftp.userName", "sync.destination.sftp.pemKeyPath", "sync.destination.sftp.pemKeyPass"}...) + default: + return nil, fmt.Errorf("sync.destination.type not set") + } case "verify": requiredConfVars = []string{ "broker.host", @@ -410,6 +445,19 @@ func NewConfig(app string) (*Config, error) { if err != nil { return nil, err } + case "sync": + if err := c.configBroker(); err != nil { + return nil, err + } + + if err := c.configDatabase(); err != nil { + return nil, err + } + + c.configArchive() + c.configSyncDestination() + c.configSchemas() + case "verify": c.configArchive() @@ -722,6 +770,21 @@ func (c *Config) configSMTP() { c.Notify.FromAddr = viper.GetString("smtp.from") } +// configSync provides configuration for the sync destination storage +func (c *Config) configSyncDestination() { + switch viper.GetString("sync.destination.type") { + case S3: + c.Sync.Type = S3 + c.Sync.S3 = configS3Storage("sync.destination") + case SFTP: + c.Sync.Type = SFTP + c.Sync.SFTP = configSFTP("sync.destination") + case POSIX: + c.Sync.Type = POSIX + c.Sync.Posix.Location = viper.GetString("sync.destination.location") + } +} + // GetC4GHKey reads and decrypts and returns the c4gh key func GetC4GHKey() (*[32]byte, error) { keyPath := viper.GetString("c4gh.filepath") @@ -872,8 +935,8 @@ func TLSConfigProxy(c *Config) (*tls.Config, error) { // CopyHeader reads the config and returns if the header will be copied func CopyHeader() bool { - if viper.IsSet("backup.copyHeader") { - return viper.GetBool("backup.copyHeader") + if viper.IsSet("sync.copyHeader") { + return viper.GetBool("sync.copyHeader") } return false diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 08c841f59..50c3423f0 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -268,7 +268,7 @@ func (suite *ConfigTestSuite) TestNotifyConfiguration() { } func (suite *ConfigTestSuite) TestCopyHeader() { - viper.Set("backup.copyHeader", "true") + viper.Set("sync.copyHeader", "true") cHeader := CopyHeader() assert.Equal(suite.T(), cHeader, true, "The CopyHeader does not work") } diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index 5bb6ac8f5..913f004f8 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -509,3 +509,18 @@ func (dbs *SDAdb) getFileInfo(id string) (FileInfo, error) { return info, nil } + +// GetHeaderForStableID retrieves the file header by using stable id +func (dbs *SDAdb) GetHeaderForStableID(stableID string) (string, error) { + dbs.checkAndReconnectIfNeeded() + + db := dbs.DB + const query = "SELECT header from sda.files WHERE stable_id = $1" + + var header string + if err := db.QueryRow(query, stableID).Scan(&header); err != nil { + return "", err + } + + return header, nil +} \ No newline at end of file diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index 8f6ac6995..8f0e04de1 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -364,3 +364,23 @@ func (suite *DatabaseTests) TestUpdateDatasetEvent() { err = db.UpdateDatasetEvent(dID, "deprecated", "{\"type\": \"deprecate\"}") assert.NoError(suite.T(), err, "got (%v) when creating new connection", err) } + +func (suite *DatabaseTests) TestGetHeaderForStableID() { + db, err := NewSDAdb(suite.dbConf) + assert.NoError(suite.T(), err, "got %v when creating new connection", err) + + // register a file in the database + fileID, err := db.RegisterFile("/testuser/TestGetHeaderForStableID.c4gh", "testuser") + assert.NoError(suite.T(), err, "failed to register file in database") + + err = db.StoreHeader([]byte("HEADER"), fileID) + assert.NoError(suite.T(), err, "failed to store file header") + + stableID := "TEST:010-1234-4567" + err = db.SetAccessionID(stableID, fileID) + assert.NoError(suite.T(), err, "got (%v) when setting stable ID: %s, %s", err, stableID, fileID) + + header, err := db.GetHeaderForStableID("TEST:010-1234-4567") + assert.NoError(suite.T(), err, "failed to get header for stable ID: %v", err) + assert.Equal(suite.T(), header, "484541444552", "did not get expected header") +} \ No newline at end of file From 3549dff414cdb07cd3661051a671c97fb7bacf25 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 19 Oct 2023 12:33:03 +0200 Subject: [PATCH 03/34] [sync ] remove the `CopyHeader`config option This is a requirement for this service to work properly --- sda/cmd/sync/sync.go | 186 ++++++++++++++--------------- sda/internal/config/config.go | 12 +- sda/internal/config/config_test.go | 5 - 3 files changed, 90 insertions(+), 113 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index df0b32065..e8ac9e3d9 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -42,19 +42,16 @@ func main() { log.Fatal(err) } - // we don't need crypt4gh keys if copyheader disabled var key *[32]byte var publicKey *[32]byte - if config.CopyHeader() { - key, err = config.GetC4GHKey() - if err != nil { - log.Fatal(err) - } + key, err = config.GetC4GHKey() + if err != nil { + log.Fatal(err) + } - publicKey, err = config.GetC4GHPublicKey() - if err != nil { - log.Fatal(err) - } + publicKey, err = config.GetC4GHPublicKey() + if err != nil { + log.Fatal(err) } defer mq.Channel.Close() @@ -257,13 +254,7 @@ func main() { continue } - // If the copy header is enabled, use the actual filepath to make backup - // This will be used in the BigPicture backup, enabling for ingestion of the file - if config.CopyHeader() { - filePath = message.FilePath - } - - dest, err := syncDestination.NewFileWriter(filePath) + dest, err := syncDestination.NewFileWriter(message.FilePath) if err != nil { log.Errorf("Failed to open backup file %s for writing "+ "(corr-id: %s, "+ @@ -298,115 +289,112 @@ func main() { continue } - // Check if the header is needed - //nolint:nestif - if config.CopyHeader() { - // Get the header from db - header, err := db.GetHeaderForStableID(message.AccessionID) - if err != nil { - log.Errorf("GetHeaderForStableID failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - } + // Get the header from db + header, err := db.GetHeaderForStableID(message.AccessionID) + if err != nil { + log.Errorf("GetHeaderForStableID failed "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + delivered.CorrelationId, + message.FilePath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + } - // Decrypt header - log.Debug("Decrypt header") - DecHeader, err := FormatHexHeader(header) - if err != nil { - log.Errorf("Failed to decode the header %s "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of decode header failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) - } - } + // Decrypt header + log.Debug("Decrypt header") + DecHeader, err := FormatHexHeader(header) + if err != nil { + log.Errorf("Failed to decode the header %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.FilePath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) - // Reencrypt header - log.Debug("Reencrypt header") - pubkeyList := [][chacha20poly1305.KeySize]byte{} - pubkeyList = append(pubkeyList, *publicKey) - newHeader, err := headers.ReEncryptHeader(DecHeader, *key, pubkeyList) - if err != nil { - log.Errorf("Failed to reencrypt the header %s "+ + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of decode header failed "+ "(corr-id: %s, "+ "filepath: %s, "+ "user: %s, "+ "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", - filePath, delivered.CorrelationId, message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, - err) - - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of reencrypt header failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) - } + e) } + } + + // Reencrypt header + log.Debug("Reencrypt header") + pubkeyList := [][chacha20poly1305.KeySize]byte{} + pubkeyList = append(pubkeyList, *publicKey) + newHeader, err := headers.ReEncryptHeader(DecHeader, *key, pubkeyList) + if err != nil { + log.Errorf("Failed to reencrypt the header %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.FilePath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) - // write header to destination file - _, err = dest.Write(newHeader) - if err != nil { - log.Errorf("Failed to write the header to destination %s "+ + if e := delivered.Nack(false, true); e != nil { + log.Errorf("Failed to NAck because of reencrypt header failed "+ "(corr-id: %s, "+ "filepath: %s, "+ "user: %s, "+ "accessionid: %s, "+ "decryptedChecksums: %v, error: %v)", - filePath, delivered.CorrelationId, message.FilePath, message.User, message.AccessionID, message.DecryptedChecksums, - err) + e) } } + // write header to destination file + _, err = dest.Write(newHeader) + if err != nil { + log.Errorf("Failed to write the header to destination %s "+ + "(corr-id: %s, "+ + "filepath: %s, "+ + "user: %s, "+ + "accessionid: %s, "+ + "decryptedChecksums: %v, error: %v)", + filePath, + delivered.CorrelationId, + message.FilePath, + message.User, + message.AccessionID, + message.DecryptedChecksums, + err) + } + + // Copy the file and check is sizes match copiedSize, err := io.Copy(dest, file) if err != nil || copiedSize != int64(fileSize) { diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index af8941c47..18fbf4564 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -266,6 +266,9 @@ func NewConfig(app string) (*Config, error) { "broker.password", "broker.queue", "broker.routingkey", + "c4gh.backupPubKey", + "c4gh.filepath", + "c4gh.passphrase", "db.host", "db.port", "db.user", @@ -932,12 +935,3 @@ func TLSConfigProxy(c *Config) (*tls.Config, error) { return cfg, nil } - -// CopyHeader reads the config and returns if the header will be copied -func CopyHeader() bool { - if viper.IsSet("sync.copyHeader") { - return viper.GetBool("sync.copyHeader") - } - - return false -} diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 50c3423f0..d8e05bf49 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -267,8 +267,3 @@ func (suite *ConfigTestSuite) TestNotifyConfiguration() { assert.NotNil(suite.T(), config) } -func (suite *ConfigTestSuite) TestCopyHeader() { - viper.Set("sync.copyHeader", "true") - cHeader := CopyHeader() - assert.Equal(suite.T(), cHeader, true, "The CopyHeader does not work") -} From 10229e3efb1712e117f5dde2ae455846b3e3b409 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 19 Oct 2023 13:15:46 +0200 Subject: [PATCH 04/34] [config] Rename `backupPubKey` to `syncPubKeyPath` --- sda/internal/config/config.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 18fbf4564..44981af52 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -266,9 +266,9 @@ func NewConfig(app string) (*Config, error) { "broker.password", "broker.queue", "broker.routingkey", - "c4gh.backupPubKey", "c4gh.filepath", "c4gh.passphrase", + "c4gh.syncPubKeyPath", "db.host", "db.port", "db.user", @@ -811,8 +811,7 @@ func GetC4GHKey() (*[32]byte, error) { // GetC4GHPublicKey reads the c4gh public key func GetC4GHPublicKey() (*[32]byte, error) { - keyPath := viper.GetString("c4gh.backupPubKey") - + keyPath := viper.GetString("c4gh.syncPubKeyPath") // Make sure the key path and passphrase is valid keyFile, err := os.Open(keyPath) if err != nil { From 37c599cf762c017c09cd1ebb771b965d5feeee7e Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 19 Oct 2023 13:25:33 +0200 Subject: [PATCH 05/34] [test][config] validate sync config --- sda/internal/config/config_test.go | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index d8e05bf49..380bd4bfc 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -267,3 +267,41 @@ func (suite *ConfigTestSuite) TestNotifyConfiguration() { assert.NotNil(suite.T(), config) } +func (suite *ConfigTestSuite) TestSyncConfig() { + suite.SetupTest() + // At this point we should fail because we lack configuration + config, err := NewConfig("backup") + assert.Error(suite.T(), err) + assert.Nil(suite.T(), config) + + viper.Set("archive.type", "posix") + viper.Set("archive.location", "test") + viper.Set("sync.destination.type", "posix") + viper.Set("sync.destination.location", "test") + viper.Set("c4gh.filepath", "/keys/key") + viper.Set("c4gh.passphrase", "pass") + viper.Set("c4gh.syncPubKeyPath", "/keys/recipient") + config, err = NewConfig("sync") + assert.NotNil(suite.T(), config) + assert.NoError(suite.T(), err) + assert.NotNil(suite.T(), config.Broker) + assert.Equal(suite.T(), "testhost", config.Broker.Host) + assert.Equal(suite.T(), 123, config.Broker.Port) + assert.Equal(suite.T(), "testuser", config.Broker.User) + assert.Equal(suite.T(), "testpassword", config.Broker.Password) + assert.Equal(suite.T(), "testqueue", config.Broker.Queue) + assert.Equal(suite.T(), "routingtest", config.Broker.RoutingKey) + assert.Equal(suite.T(), "testexchange", config.Broker.Exchange) + assert.NotNil(suite.T(), config.Database) + assert.Equal(suite.T(), "test", config.Database.Host) + assert.Equal(suite.T(), 123, config.Database.Port) + assert.Equal(suite.T(), "test", config.Database.User) + assert.Equal(suite.T(), "test", config.Database.Password) + assert.Equal(suite.T(), "test", config.Database.Database) + assert.NotNil(suite.T(), config.Archive) + assert.NotNil(suite.T(), config.Archive.Posix) + assert.Equal(suite.T(), "test", config.Archive.Posix.Location) + assert.NotNil(suite.T(), config.Sync) + assert.NotNil(suite.T(), config.Sync.Posix) + assert.Equal(suite.T(), "test", config.Sync.Posix.Location) +} \ No newline at end of file From 34dee634c2f7090bc8319e4fab02b17cca1b0179 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 19 Oct 2023 13:26:48 +0200 Subject: [PATCH 06/34] [test][config] validte `GetC4GHpublicKey` --- sda/internal/config/config_test.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 380bd4bfc..57adfef93 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -1,6 +1,7 @@ package config import ( + "encoding/base64" "errors" "fmt" "os" @@ -304,4 +305,22 @@ func (suite *ConfigTestSuite) TestSyncConfig() { assert.NotNil(suite.T(), config.Sync) assert.NotNil(suite.T(), config.Sync.Posix) assert.Equal(suite.T(), "test", config.Sync.Posix.Location) -} \ No newline at end of file +} +func (suite *ConfigTestSuite) TestGetC4GHPublicKey() { + pubKey := "-----BEGIN CRYPT4GH PUBLIC KEY-----\nuQO46R56f/Jx0YJjBAkZa2J6n72r6HW/JPMS4tfepBs=\n-----END CRYPT4GH PUBLIC KEY-----" + pubKeyPath, _ := os.MkdirTemp("", "pubkey") + err := os.WriteFile(pubKeyPath+"/c4gh.pub", []byte(pubKey), 0600) + assert.NoError(suite.T(), err) + + var kb [32]byte + k, _ := base64.StdEncoding.DecodeString("uQO46R56f/Jx0YJjBAkZa2J6n72r6HW/JPMS4tfepBs=") + copy(kb[:], k) + + viper.Set("c4gh.syncPubKeyPath", pubKeyPath+"/c4gh.pub") + pkBytes, err := GetC4GHPublicKey() + assert.NoError(suite.T(), err) + assert.NotNil(suite.T(), pkBytes) + assert.Equal(suite.T(), pkBytes, &kb, "GetC4GHPublicKey didn't return correct pubKey") + + defer os.RemoveAll(pubKeyPath) +} From 1ec61ca2105bf545c05497715f16b0410a61c9cc Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 19 Oct 2023 13:39:09 +0200 Subject: [PATCH 07/34] [test][config] validte `GetC4GHkey` --- sda/internal/config/config_test.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 57adfef93..d6f9d1a99 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -324,3 +324,22 @@ func (suite *ConfigTestSuite) TestGetC4GHPublicKey() { defer os.RemoveAll(pubKeyPath) } +func (suite *ConfigTestSuite) TestGetC4GHKey() { + key := "-----BEGIN CRYPT4GH ENCRYPTED PRIVATE KEY-----\nYzRnaC12MQAGc2NyeXB0ABQAAAAAEna8op+BzhTVrqtO5Rx7OgARY2hhY2hhMjBfcG9seTEzMDUAPMx2Gbtxdva0M2B0tb205DJT9RzZmvy/9ZQGDx9zjlObj11JCqg57z60F0KhJW+j/fzWL57leTEcIffRTA==\n-----END CRYPT4GH ENCRYPTED PRIVATE KEY-----" + keyPath, _ := os.MkdirTemp("", "key") + err := os.WriteFile(keyPath+"/c4gh.key", []byte(key), 0600) + assert.NoError(suite.T(), err) + + viper.Set("c4gh.filepath", keyPath+"/c4gh.key") + pkBytes, err := GetC4GHKey() + assert.EqualError(suite.T(), err, "chacha20poly1305: message authentication failed") + assert.Nil(suite.T(), pkBytes) + + viper.Set("c4gh.filepath", keyPath+"/c4gh.key") + viper.Set("c4gh.passphrase", "test") + pkBytes, err = GetC4GHKey() + assert.NoError(suite.T(), err) + assert.NotNil(suite.T(), pkBytes) + + defer os.RemoveAll(keyPath) +} From 0bbf617e35ddc8b7d9df3c1970b220c6bea8a744 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Fri, 20 Oct 2023 13:49:11 +0200 Subject: [PATCH 08/34] [internal][database] make `GetHeaderForStableID` return byte array Now both `GetHeader` and `GetHeaderForStableID` return the same data type --- sda/cmd/sync/sync.go | 53 +--------------------- sda/internal/database/db_functions.go | 16 ++++--- sda/internal/database/db_functions_test.go | 4 +- 3 files changed, 12 insertions(+), 61 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index e8ac9e3d9..d42c3de06 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -3,11 +3,9 @@ package main import ( - "encoding/hex" "encoding/json" "fmt" "io" - "strings" "github.com/neicnordic/crypt4gh/model/headers" "github.com/neicnordic/sensitive-data-archive/internal/broker" @@ -306,45 +304,11 @@ func main() { err) } - // Decrypt header - log.Debug("Decrypt header") - DecHeader, err := FormatHexHeader(header) - if err != nil { - log.Errorf("Failed to decode the header %s "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of decode header failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) - } - } - // Reencrypt header log.Debug("Reencrypt header") pubkeyList := [][chacha20poly1305.KeySize]byte{} pubkeyList = append(pubkeyList, *publicKey) - newHeader, err := headers.ReEncryptHeader(DecHeader, *key, pubkeyList) + newHeader, err := headers.ReEncryptHeader(header, *key, pubkeyList) if err != nil { log.Errorf("Failed to reencrypt the header %s "+ "(corr-id: %s, "+ @@ -487,18 +451,3 @@ func main() { <-forever } - -// FormatHexHeader decodes a hex formatted file header, and returns the data as a binary -func FormatHexHeader(hexData string) ([]byte, error) { - - // Trim whitespace that might otherwise confuse the hex parse - headerHexStr := strings.TrimSpace(hexData) - - // Decode the hex - binaryHeader, err := hex.DecodeString(headerHexStr) - if err != nil { - return nil, err - } - - return binaryHeader, nil -} diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index 913f004f8..b53e0694d 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -511,16 +511,18 @@ func (dbs *SDAdb) getFileInfo(id string) (FileInfo, error) { } // GetHeaderForStableID retrieves the file header by using stable id -func (dbs *SDAdb) GetHeaderForStableID(stableID string) (string, error) { +func (dbs *SDAdb) GetHeaderForStableID(stableID string) ([]byte, error) { dbs.checkAndReconnectIfNeeded() - - db := dbs.DB const query = "SELECT header from sda.files WHERE stable_id = $1" + var hexString string + if err := dbs.DB.QueryRow(query, stableID).Scan(&hexString); err != nil { + return nil, err + } - var header string - if err := db.QueryRow(query, stableID).Scan(&header); err != nil { - return "", err + header, err := hex.DecodeString(hexString) + if err != nil { + return nil, err } return header, nil -} \ No newline at end of file +} diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index 8f0e04de1..462f4a171 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -382,5 +382,5 @@ func (suite *DatabaseTests) TestGetHeaderForStableID() { header, err := db.GetHeaderForStableID("TEST:010-1234-4567") assert.NoError(suite.T(), err, "failed to get header for stable ID: %v", err) - assert.Equal(suite.T(), header, "484541444552", "did not get expected header") -} \ No newline at end of file + assert.Equal(suite.T(), header, []byte("HEADER"), "did not get expected header") +} From 8a761c8c54eb49de851d7a7c8a48c51c9d4f9e05 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Fri, 20 Oct 2023 13:59:06 +0200 Subject: [PATCH 09/34] [sync] cleanup error messages --- sda/cmd/sync/sync.go | 343 +++++++------------------------------------ 1 file changed, 51 insertions(+), 292 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index d42c3de06..d2b11a823 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -82,136 +82,54 @@ func main() { delivered.Body) err := schema.ValidateJSON(fmt.Sprintf("%s/ingestion-completion.json", conf.Broker.SchemasPath), delivered.Body) - if err != nil { - log.Errorf("Validation of incoming message failed "+ - "(corr-id: %s, error: %v)", - delivered.CorrelationId, - err) + log.Errorf("validation of incoming message (ingestion-completion) failed, reason: (%s)", err.Error()) + // Send the message to an error queue so it can be analyzed. + infoErrorMessage := broker.InfoError{ + Error: "Message validation failed", + Reason: err.Error(), + OriginalMessage: message, + } + + body, _ := json.Marshal(infoErrorMessage) + if err := mq.SendMessage(delivered.CorrelationId, conf.Broker.Exchange, "error", body); err != nil { + log.Errorf("failed to publish message, reason: (%s)", err.Error()) + } + if err := delivered.Ack(false); err != nil { + log.Errorf("failed to Ack message, reason: (%s)", err.Error()) + } continue } // we unmarshal the message in the validation step so this is safe to do _ = json.Unmarshal(delivered.Body, &message) - - log.Infof("Received work (corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums) - - var filePath string - var fileSize int - if filePath, fileSize, err = db.GetArchived(delivered.CorrelationId); err != nil { - log.Errorf("GetArchived failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - // nack the message but requeue until we fixed the SQL retry. - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of GetArchived failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + filePath, fileSize, err := db.GetArchived(delivered.CorrelationId) + if err != nil { + log.Errorf("GetArchived failed, reason: %s", err.Error()) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following GetArchived error message") } continue } - log.Debug("Backup initiated") - - // Get size on disk, will also give some time for the file to - // appear if it has not already - diskFileSize, err := archive.GetFileSize(filePath) - if err != nil { - log.Errorf("Failed to get size info for archived file %s "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of GetFileSize failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + log.Errorf("failed to get size info for archived file %s, reason: (%s)", filePath, err.Error()) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following GetFileSize error message") } continue } if diskFileSize != int64(fileSize) { - log.Errorf("File size in archive does not match database for archive file %s "+ - "- archive size is %d, database has %d "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - diskFileSize, - fileSize, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of file size differences failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + log.Errorf("File size in archive does not match database for archive file %s - archive size is %d, database has %d ", + filePath, diskFileSize, fileSize, + ) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following GetFileSize error message") } continue @@ -219,34 +137,9 @@ func main() { file, err := archive.NewFileReader(filePath) if err != nil { - log.Errorf("Failed to open archived file %s "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - //FIXME: should it retry? - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of NewFileReader failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + log.Errorf("failed to open archived file %s, reason: (%s)", filePath, err.Error()) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following open archived file error message") } continue @@ -254,141 +147,47 @@ func main() { dest, err := syncDestination.NewFileWriter(message.FilePath) if err != nil { - log.Errorf("Failed to open backup file %s for writing "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - //FIXME: should it retry? - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of NewFileWriter failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + log.Errorf("failed to open destination file %s for writing, reason: (%s)", filePath, err.Error()) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following open destination file error message") } continue } - // Get the header from db header, err := db.GetHeaderForStableID(message.AccessionID) if err != nil { - log.Errorf("GetHeaderForStableID failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) + log.Errorf("GetHeaderForStableID %s failed, reason: (%s)", message.AccessionID, err.Error()) } - // Reencrypt header log.Debug("Reencrypt header") pubkeyList := [][chacha20poly1305.KeySize]byte{} pubkeyList = append(pubkeyList, *publicKey) newHeader, err := headers.ReEncryptHeader(header, *key, pubkeyList) if err != nil { - log.Errorf("Failed to reencrypt the header %s "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of reencrypt header failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + log.Errorf("failed to reencrypt the header, reason(%s)", err.Error()) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following reencrypt header error message") } } - // write header to destination file _, err = dest.Write(newHeader) if err != nil { - log.Errorf("Failed to write the header to destination %s "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - filePath, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) + log.Errorf("failed to write the header to destination %s, reason(%s)", message.FilePath, err.Error()) } - // Copy the file and check is sizes match copiedSize, err := io.Copy(dest, file) if err != nil || copiedSize != int64(fileSize) { - log.Errorf("Failed to copy file "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - //FIXME: should it retry? - if e := delivered.Nack(false, true); e != nil { - log.Errorf("Failed to NAck because of Copy failed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - e) + switch { + case err != nil: + log.Errorf("failed to copy the file, reason (%s)", err.Error()) + case copiedSize != int64(fileSize): + log.Errorf("copied size does not match file size") + } + + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following reencrypt header error message") } continue @@ -397,54 +196,14 @@ func main() { file.Close() dest.Close() - log.Infof("Backuped file %s (%d bytes) from archive to backup "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v)", - filePath, - fileSize, - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums) - if err := mq.SendMessage(delivered.CorrelationId, conf.Broker.Exchange, conf.Broker.RoutingKey, delivered.Body); err != nil { - // TODO fix resend mechanism - log.Errorf("Failed to send message for completed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - - // Restart loop, do not ack + log.Errorf("failed to publish message, reason: (%s)", err.Error()) + continue } if err := delivered.Ack(false); err != nil { - - log.Errorf("Failed to ack message after work completed "+ - "(corr-id: %s, "+ - "filepath: %s, "+ - "user: %s, "+ - "accessionid: %s, "+ - "decryptedChecksums: %v, error: %v)", - delivered.CorrelationId, - message.FilePath, - message.User, - message.AccessionID, - message.DecryptedChecksums, - err) - + log.Errorf("failed to Ack message, reason: (%s)", err.Error()) } } }() From 07e6e41c1fd2d9b2db5d27c7735bfc5f9ccab278 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Fri, 20 Oct 2023 14:04:23 +0200 Subject: [PATCH 10/34] [sync] No need to send an message after completing the work --- sda/cmd/sync/sync.go | 6 ------ sda/internal/config/config.go | 1 - sda/internal/config/config_test.go | 2 -- 3 files changed, 9 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index d2b11a823..ae6b3973d 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -196,12 +196,6 @@ func main() { file.Close() dest.Close() - if err := mq.SendMessage(delivered.CorrelationId, conf.Broker.Exchange, conf.Broker.RoutingKey, delivered.Body); err != nil { - log.Errorf("failed to publish message, reason: (%s)", err.Error()) - - continue - } - if err := delivered.Ack(false); err != nil { log.Errorf("failed to Ack message, reason: (%s)", err.Error()) } diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 44981af52..9b50a5a75 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -265,7 +265,6 @@ func NewConfig(app string) (*Config, error) { "broker.user", "broker.password", "broker.queue", - "broker.routingkey", "c4gh.filepath", "c4gh.passphrase", "c4gh.syncPubKeyPath", diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index d6f9d1a99..466ea7a22 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -291,8 +291,6 @@ func (suite *ConfigTestSuite) TestSyncConfig() { assert.Equal(suite.T(), "testuser", config.Broker.User) assert.Equal(suite.T(), "testpassword", config.Broker.Password) assert.Equal(suite.T(), "testqueue", config.Broker.Queue) - assert.Equal(suite.T(), "routingtest", config.Broker.RoutingKey) - assert.Equal(suite.T(), "testexchange", config.Broker.Exchange) assert.NotNil(suite.T(), config.Database) assert.Equal(suite.T(), "test", config.Database.Host) assert.Equal(suite.T(), 123, config.Database.Port) From 605daddf3922ad3202de156c2a081e6cf585e54e Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Fri, 20 Oct 2023 10:37:26 +0200 Subject: [PATCH 11/34] [Integration test] Add case for sync --- .../scripts/make_sda_credentials.sh | 6 +++++ .github/integration/sda-s3-integration.yml | 26 +++++++++++++++++++ .github/integration/sda/config.yaml | 11 +++++++- .github/integration/tests/sda/35_sync_test.sh | 19 ++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 .github/integration/tests/sda/35_sync_test.sh diff --git a/.github/integration/scripts/make_sda_credentials.sh b/.github/integration/scripts/make_sda_credentials.sh index 5f0bd3f0a..fb1908a83 100644 --- a/.github/integration/scripts/make_sda_credentials.sh +++ b/.github/integration/scripts/make_sda_credentials.sh @@ -17,6 +17,7 @@ pip install aiohttp Authlib joserfc requests > /dev/null for n in download finalize inbox ingest mapper sync verify; do echo "creating credentials for: $n" psql -U postgres -h postgres -d sda -c "ALTER ROLE $n LOGIN PASSWORD '$n';" + psql -U postgres -h postgres -d sda -c "GRANT base TO $n;" ## password and permissions for MQ body_data=$(jq -n -c --arg password "$n" --arg tags none '$ARGS.named') @@ -60,6 +61,11 @@ if [ ! -f "/shared/c4gh.sec.pem" ]; then curl -s -L https://github.com/neicnordic/crypt4gh/releases/download/v1.7.4/crypt4gh_linux_x86_64.tar.gz | tar -xz -C /shared/ && chmod +x /shared/crypt4gh /shared/crypt4gh generate -n /shared/c4gh -p c4ghpass fi +if [ ! -f "/shared/sync.sec.pem" ]; then + echo "creating crypth4gh key" + curl -s -L https://github.com/neicnordic/crypt4gh/releases/download/v1.7.4/crypt4gh_linux_x86_64.tar.gz | tar -xz -C /shared/ && chmod +x /shared/crypt4gh + /shared/crypt4gh generate -n /shared/sync -p syncPass +fi if [ ! -f "/shared/keys/ssh" ]; then ssh-keygen -o -a 256 -t ed25519 -f /shared/keys/ssh -N "" diff --git a/.github/integration/sda-s3-integration.yml b/.github/integration/sda-s3-integration.yml index 070e70859..83c4494c9 100644 --- a/.github/integration/sda-s3-integration.yml +++ b/.github/integration/sda-s3-integration.yml @@ -208,6 +208,30 @@ services: - ./sda/config.yaml:/config.yaml - shared:/shared + sync: + image: ghcr.io/neicnordic/sensitive-data-archive:PR${PR_NUMBER} + command: [ sda-sync ] + container_name: sync + depends_on: + credentials: + condition: service_completed_successfully + minio: + condition: service_healthy + postgres: + condition: service_healthy + rabbitmq: + condition: service_healthy + environment: + - BROKER_PASSWORD=sync + - BROKER_USER=sync + - BROKER_QUEUE=completed_stream + - DB_PASSWORD=sync + - DB_USER=sync + restart: always + volumes: + - ./sda/config.yaml:/config.yaml + - shared:/shared + oidc: container_name: oidc command: @@ -250,6 +274,8 @@ services: condition: service_started s3inbox: condition: service_started + sync: + condition: service_started verify: condition: service_started environment: diff --git a/.github/integration/sda/config.yaml b/.github/integration/sda/config.yaml index 15e949f6e..6690b1fbe 100644 --- a/.github/integration/sda/config.yaml +++ b/.github/integration/sda/config.yaml @@ -50,6 +50,7 @@ db: c4gh: filePath: /shared/c4gh.sec.pem passphrase: "c4ghpass" + syncPubKeyPath: /shared/sync.pub.pem server: cert: "" @@ -57,4 +58,12 @@ server: jwtpubkeypath: "/shared/keys/pub/" jwtpubkeyurl: "http://oidc:8080/jwk" - +sync.destination: + type: "s3" + url: "http://s3" + port: 9000 + readypath: "/minio/health/ready" + accessKey: "access" + secretKey: "secretKey" + bucket: "sync" + region: "us-east-1" diff --git a/.github/integration/tests/sda/35_sync_test.sh b/.github/integration/tests/sda/35_sync_test.sh new file mode 100644 index 000000000..5c0f37422 --- /dev/null +++ b/.github/integration/tests/sda/35_sync_test.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +cd shared || true + +# check bucket for synced files +for file in NA12878.bam.c4gh NA12878_20k_b37.bam.c4gh; do + RETRY_TIMES=0 + until [ "$(s3cmd -c direct ls s3://sync/test_dummy.org/"$file")" != "" ]; do + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "::error::Time out while waiting for files to be synced" + exit 1 + fi + sleep 2 + done +done + +echo "files synced successfully" \ No newline at end of file From ac6cb63ec2bf898ae4ec041bfc03527297bf9f58 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Fri, 3 Nov 2023 08:07:13 +0100 Subject: [PATCH 12/34] [sync] cleanup Readme.md --- sda/cmd/sync/sync.md | 204 ++++++++++++++++++------------------------- 1 file changed, 83 insertions(+), 121 deletions(-) diff --git a/sda/cmd/sync/sync.md b/sda/cmd/sync/sync.md index 4b24235c8..8e6c9323a 100644 --- a/sda/cmd/sync/sync.md +++ b/sda/cmd/sync/sync.md @@ -1,172 +1,134 @@ -# sda-pipeline: backup +# Sync -Moves data to backup storage and optionally merges it with the encryption header. +Copies files from the archive to the sync destination, including the header so that the files can be ingested at the remote site. ## Configuration -There are a number of options that can be set for the backup service. +There are a number of options that can be set for the sync service. These settings can be set by mounting a yaml-file at `/config.yaml` with settings. ex. + ```yaml log: level: "debug" format: "json" ``` + They may also be set using environment variables like: + ```bash export LOG_LEVEL="debug" export LOG_FORMAT="json" ``` -### Backup specific settings - - - `BACKUP_COPYHEADER`: if `true`, the backup service will reencrypt and add headers to the backup files. - -#### Keyfile settings +### Keyfile settings These settings control which crypt4gh keyfile is loaded. -These settings are only needed is `copyheader` is `true`. - - `C4GH_FILEPATH`: path to the crypt4gh keyfile - - `C4GH_PASSPHRASE`: pass phrase to unlock the keyfile - - `C4GH_BACKUPPUBKEY`: path to the crypt4gh public key to use for reencrypting file headers. +- `C4GH_FILEPATH`: path to the crypt4gh keyfile +- `C4GH_PASSPHRASE`: pass phrase to unlock the keyfile +- `C4GH_SYNCPUBKEYPATH`: path to the crypt4gh public key to use for reencrypting file headers. ### RabbitMQ broker settings -These settings control how backup connects to the RabbitMQ message broker. - - - `BROKER_HOST`: hostname of the rabbitmq server - - - `BROKER_PORT`: rabbitmq broker port (commonly `5671` with TLS and `5672` without) - - - `BROKER_QUEUE`: message queue to read messages from (commonly `backup`) - - - `BROKER_ROUTINGKEY`: message queue to write success messages to (commonly `completed`) - - - `BROKER_USER`: username to connect to rabbitmq - - - `BROKER_PASSWORD`: password to connect to rabbitmq - - - `BROKER_PREFETCHCOUNT`: Number of messages to pull from the message server at the time (default to 2) - -### PostgreSQL Database settings: - - - `DB_HOST`: hostname for the postgresql database - - - `DB_PORT`: database port (commonly 5432) +These settings control how sync connects to the RabbitMQ message broker. - - `DB_USER`: username for the database +- `BROKER_HOST`: hostname of the rabbitmq server +- `BROKER_PORT`: rabbitmq broker port (commonly `5671` with TLS and `5672` without) +- `BROKER_QUEUE`: message queueor stream to read messages from (commonly `completed_stream`) +- `BROKER_USER`: username to connect to rabbitmq +- `BROKER_PASSWORD`: password to connect to rabbitmq +- `BROKER_PREFETCHCOUNT`: Number of messages to pull from the message server at the time (default to 2) - - `DB_PASSWORD`: password for the database +### PostgreSQL Database settings - - `DB_DATABASE`: database name +- `DB_HOST`: hostname for the postgresql database +- `DB_PORT`: database port (commonly 5432) +- `DB_USER`: username for the database +- `DB_PASSWORD`: password for the database +- `DB_DATABASE`: database name +- `DB_SSLMODE`: The TLS encryption policy to use for database connections. Valid options are: + - `disable` + - `allow` + - `prefer` + - `require` + - `verify-ca` + - `verify-full` - - `DB_SSLMODE`: The TLS encryption policy to use for database connections. - Valid options are: - - `disable` - - `allow` - - `prefer` - - `require` - - `verify-ca` - - `verify-full` + More information is available [in the postgresql documentation](https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION) - More information is available - [in the postgresql documentation](https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION) +Note that if `DB_SSLMODE` is set to anything but `disable`, then `DB_CACERT` needs to be set, and if set to `verify-full`, then `DB_CLIENTCERT`, and `DB_CLIENTKEY` must also be set - Note that if `DB_SSLMODE` is set to anything but `disable`, then `DB_CACERT` needs to be set, - and if set to `verify-full`, then `DB_CLIENTCERT`, and `DB_CLIENTKEY` must also be set - - - `DB_CLIENTKEY`: key-file for the database client certificate - - - `DB_CLIENTCERT`: database client certificate file - - - `DB_CACERT`: Certificate Authority (CA) certificate for the database to use +- `DB_CLIENTKEY`: key-file for the database client certificate +- `DB_CLIENTCERT`: database client certificate file +- `DB_CACERT`: Certificate Authority (CA) certificate for the database to use ### Storage settings -Storage backend is defined by the `ARCHIVE_TYPE`, and `BACKUP_TYPE` variables. -Valid values for these options are `S3` or `POSIX` -(Defaults to `POSIX` on unknown values). +Storage backend is defined by the `ARCHIVE_TYPE`, and `SYNC_DESTINATION_TYPE` variables. +Valid values for these options are `S3` or `POSIX` for `ARCHIVE_TYPE` and `POSIX`, `S3` or `SFTP` for `SYNC_DESTINATION_TYPE`. The value of these variables define what other variables are read. -The same variables are available for all storage types, differing by prefix (`ARCHIVE_`, or `BACKUP_`) +The same variables are available for all storage types, differing by prefix (`ARCHIVE_`, or `SYNC_DESTINATION_`) if `*_TYPE` is `S3` then the following variables are available: - - `*_URL`: URL to the S3 system - - `*_ACCESSKEY`: The S3 access and secret key are used to authenticate to S3, - [more info at AWS](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) - - `*_SECRETKEY`: The S3 access and secret key are used to authenticate to S3, - [more info at AWS](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) - - `*_BUCKET`: The S3 bucket to use as the storage root - - `*_PORT`: S3 connection port (default: `443`) - - `*_REGION`: S3 region (default: `us-east-1`) - - `*_CHUNKSIZE`: S3 chunk size for multipart uploads. -# CA certificate is only needed if the S3 server has a certificate signed by a private entity - - `*_CACERT`: Certificate Authority (CA) certificate for the storage system - -and if `*_TYPE` is `POSIX`: - - `*_LOCATION`: POSIX path to use as storage root - -### Logging settings: - - - `LOG_FORMAT` can be set to “json” to get logs in json format. - All other values result in text logging - - - `LOG_LEVEL` can be set to one of the following, in increasing order of severity: - - `trace` - - `debug` - - `info` - - `warn` (or `warning`) - - `error` - - `fatal` - - `panic` - -## Service Description -The backup service copies files from the archive storage to backup storage. If a public key is supplied and the copyHeader option is enabled the header will be re-encrypted and attached to the file before writing it to backup storage. - -When running, backup reads messages from the configured RabbitMQ queue (default "backup"). -For each message, these steps are taken (if not otherwise noted, errors halts progress, the message is Nack'ed, and the service moves on to the next message): -1. The message is validated as valid JSON that matches either the "ingestion-completion" or "ingestion-accession" schema (based on configuration). -If the message can’t be validated it is discarded with an error message in the logs. +- `*_URL`: URL to the S3 system +- `*_ACCESSKEY`: The S3 access and secret key are used to authenticate to S3, [more info at AWS](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) +- `*_SECRETKEY`: The S3 access and secret key are used to authenticate to S3, [more info at AWS](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) +- `*_BUCKET`: The S3 bucket to use as the storage root +- `*_PORT`: S3 connection port (default: `443`) +- `*_REGION`: S3 region (default: `us-east-1`) +- `*_CHUNKSIZE`: S3 chunk size for multipart uploads. +- `*_CACERT`: Certificate Authority (CA) certificate for the storage system, CA certificate is only needed if the S3 server has a certificate signed by a private entity -1. The file path and file size is fetched from the database. - 1. In case the service is configured to copy headers, the path is replaced by the one of the incoming message and it is the original location where the file was uploaded in the inbox. +if `*_TYPE` is `POSIX`: -1. The file size on disk is requested from the storage system. +- `*_LOCATION`: POSIX path to use as storage root -1. The database file size is compared against the disk file size. +and if `*_TYPE` is `SFTP`: -1. A file reader is created for the archive storage file, and a file writer is created for the backup storage file. +- `*_HOST`: URL to the SFTP server +- `*_PORT`: Port of the SFTP server to connect to +- `*_USERNAME`: Username connectin to the SFTP server +- `*_HOSTKEY`: The SFTP server's public key +- `*_PEMKEYPATH`: Path to the ssh private key used to connect to the SFTP server +- `*_PEMKEYPASS`: Passphrase for the ssh private key -1. If the service is configured to copy headers: +### Logging settings - 1. The header is read from the database. - On error, the error is written to the logs, but the message continues processing. +- `LOG_FORMAT` can be set to “json” to get logs in json format. All other values result in text logging +- `LOG_LEVEL` can be set to one of the following, in increasing order of severity: + - `trace` + - `debug` + - `info` + - `warn` (or `warning`) + - `error` + - `fatal` + - `panic` - 1. The header is decrypted. - If this causes an error, the error is written to the logs, the message is Nack'ed, but message processing continues. - - 1. The header is reencrypted. - If this causes an error, the error is written to the logs, the message is Nack'ed, but message processing continues. - - 1. The header is written to the backup file writer. - On error, the error is written to the logs, but the message continues processing. +## Service Description -1. The file data is copied from the archive file reader to the backup file writer. +The sync service copies files from the archive storage to sync storage. -1. A completed message is sent to RabbitMQ, if this fails a message is written to the logs, and the message is neither nack'ed nor ack'ed. +When running, sync reads messages from the "completed" RabbitMQ queue. +For each message, these steps are taken (if not otherwise noted, errors halts progress, the message is Nack'ed, and the service moves on to the next message): -1. The message is Ack'ed. +1. The message is validated as valid JSON that matches the "ingestion-completion" schema. If the message can’t be validated it is sent to the error queue for later analysis. +2. The archive file path and file size is fetched from the database. +3. The file size on disk is requested from the storage system. +4. The archive file size from the database is compared against the disk file size. +5. A file reader is created for the archive storage file, and a file writer is created for the sync storage file. + 1. The header is read from the database. + 2. The header is decrypted. + 3. The header is reencrypted with the destinations public key. + 4. The header is written to the sync file writer. +6. The file data is copied from the archive file reader to the sync file writer. +7. The message is Ack'ed. ## Communication - - Backup reads messages from one rabbitmq queue (default `backup`) - - - Backup writes messages to one rabbitmq queue (default `completed`) - - - Backup optionally reads encryption headers from the database and can not be started without a database connection. - This is done using the `GetArchived`, and `GetHeaderForStableID` functions. - - - Backup reads data from archive storage and writes data to backup storage. +- Sync reads messages from one rabbitmq stream (`completed_stream`) +- Sync reads file information and headers from the database and can not be started without a database connection. This is done using the `GetArchived`, and `GetHeaderForStableID` functions. +- Sync reads data from archive storage and writes data to sync destination storage. From 68ad2679ae5ea68b9b50efb5ed4ce54baffab7d8 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 17 Oct 2023 17:45:50 +0200 Subject: [PATCH 13/34] Copy sync-api related files from old sda-pipline repo --- sda/cmd/syncapi/syncapi.go | 441 ++++++++++++++++++++++ sda/cmd/syncapi/syncapi.md | 18 + sda/cmd/syncapi/syncapi_test.go | 362 ++++++++++++++++++ sda/schemas/bigpicture/file-sync.json | 119 ++++++ sda/schemas/bigpicture/metadata-sync.json | 32 ++ 5 files changed, 972 insertions(+) create mode 100644 sda/cmd/syncapi/syncapi.go create mode 100644 sda/cmd/syncapi/syncapi.md create mode 100644 sda/cmd/syncapi/syncapi_test.go create mode 100644 sda/schemas/bigpicture/file-sync.json create mode 100644 sda/schemas/bigpicture/metadata-sync.json diff --git a/sda/cmd/syncapi/syncapi.go b/sda/cmd/syncapi/syncapi.go new file mode 100644 index 000000000..ee16df229 --- /dev/null +++ b/sda/cmd/syncapi/syncapi.go @@ -0,0 +1,441 @@ +package main + +import ( + "bytes" + "context" + "crypto/sha256" + "crypto/subtle" + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/signal" + "syscall" + "time" + + "sda-pipeline/internal/broker" + "sda-pipeline/internal/common" + "sda-pipeline/internal/config" + "sda-pipeline/internal/database" + + "github.com/gorilla/mux" + + log "github.com/sirupsen/logrus" +) + +var Conf *config.Config +var err error + +type syncDataset struct { + DatasetID string `json:"dataset_id"` + DatasetFiles []datasetFiles `json:"dataset_files"` + User string `json:"user"` +} + +type datasetFiles struct { + FilePath string `json:"filepath"` + FileID string `json:"file_id"` + ShaSum string `json:"sha256"` +} + +func main() { + Conf, err = config.NewConfig("sync") + if err != nil { + log.Fatal(err) + } + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + if err != nil { + log.Fatal(err) + } + Conf.API.DB, err = database.NewDB(Conf.Database) + if err != nil { + log.Fatal(err) + } + + sigc := make(chan os.Signal, 5) + signal.Notify(sigc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) + go func() { + <-sigc + shutdown() + os.Exit(0) + }() + + go func() { + forever := make(chan bool) + messages, err := Conf.API.MQ.GetMessages(Conf.Broker.Queue) + if err != nil { + log.Fatal(err) + } + for m := range messages { + log.Debugf("Received a message (corr-id: %s, message: %s)", m.CorrelationId, m.Body) + res, err := common.ValidateJSON(Conf.Broker.SchemasPath+"dataset-mapping.json", m.Body) + if err != nil { + if err := m.Nack(false, false); err != nil { + log.Errorf("Failed to nack message, reason: %v", err) + } + + continue + } + if !res.Valid() { + errorString := "" + for _, validErr := range res.Errors() { + errorString += validErr.String() + "\n\n" + } + if err := m.Nack(false, false); err != nil { + log.Errorf("Failed to nack message, reason: %v", err) + } + + continue + } + log.Infoln("buildSyncDatasetJSON") + blob, err := buildSyncDatasetJSON(m.Body) + if err != nil { + log.Errorf("failed to build SyncDatasetJSON, Reason: %v", err) + } + if err := sendPOST(blob); err != nil { + log.Errorf("failed to send POST, Reason: %v", err) + } + if err := m.Ack(false); err != nil { + log.Errorf("Failed to ack message: reason %v", err) + } + + } + <-forever + }() + + srv := setup(Conf) + + if Conf.API.ServerCert != "" && Conf.API.ServerKey != "" { + log.Infof("Web server is ready to receive connections at https://%s:%d", Conf.API.Host, Conf.API.Port) + if err := srv.ListenAndServeTLS(Conf.API.ServerCert, Conf.API.ServerKey); err != nil { + shutdown() + log.Fatalln(err) + } + } else { + log.Infof("Web server is ready to receive connections at http://%s:%d", Conf.API.Host, Conf.API.Port) + if err := srv.ListenAndServe(); err != nil { + shutdown() + log.Fatalln(err) + } + } +} + +func setup(config *config.Config) *http.Server { + r := mux.NewRouter().SkipClean(true) + + r.HandleFunc("/ready", readinessResponse).Methods("GET") + r.HandleFunc("/dataset", basicAuth(http.HandlerFunc(dataset))).Methods("POST") + r.HandleFunc("/metadata", basicAuth(http.HandlerFunc(metadata))).Methods("POST") + + cfg := &tls.Config{ + MinVersion: tls.VersionTLS12, + CurvePreferences: []tls.CurveID{tls.CurveP521, tls.CurveP384, tls.CurveP256}, + PreferServerCipherSuites: true, + CipherSuites: []uint16{ + tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + }, + } + + srv := &http.Server{ + Addr: config.API.Host + ":" + fmt.Sprint(config.API.Port), + Handler: r, + TLSConfig: cfg, + TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)), + ReadTimeout: 5 * time.Second, + WriteTimeout: 5 * time.Second, + IdleTimeout: 30 * time.Second, + ReadHeaderTimeout: 3 * time.Second, + } + + return srv +} + +func shutdown() { + defer Conf.API.MQ.Channel.Close() + defer Conf.API.MQ.Connection.Close() + defer Conf.API.DB.Close() +} + +func readinessResponse(w http.ResponseWriter, r *http.Request) { + statusCocde := http.StatusOK + + if Conf.API.MQ.Connection.IsClosed() { + statusCocde = http.StatusServiceUnavailable + newConn, err := broker.NewMQ(Conf.Broker) + if err != nil { + log.Errorf("failed to reconnect to MQ, reason: %v", err) + } else { + Conf.API.MQ = newConn + } + } + + if Conf.API.MQ.Channel.IsClosed() { + statusCocde = http.StatusServiceUnavailable + Conf.API.MQ.Connection.Close() + newConn, err := broker.NewMQ(Conf.Broker) + if err != nil { + log.Errorf("failed to reconnect to MQ, reason: %v", err) + } else { + Conf.API.MQ = newConn + } + } + + if DBRes := checkDB(Conf.API.DB, 5*time.Millisecond); DBRes != nil { + log.Debugf("DB connection error :%v", DBRes) + Conf.API.DB.Reconnect() + statusCocde = http.StatusServiceUnavailable + } + + w.WriteHeader(statusCocde) +} + +func checkDB(database *database.SQLdb, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + if database.DB == nil { + return fmt.Errorf("database is nil") + } + + return database.DB.PingContext(ctx) +} + +func dataset(w http.ResponseWriter, r *http.Request) { + b, err := io.ReadAll(r.Body) + if err != nil { + respondWithError(w, http.StatusBadRequest, "failed to read request body") + + return + } + defer r.Body.Close() + + // the filepath looks funkt for now, it will sort itself out when we switch to sda-common + res, err := common.ValidateJSON(Conf.Broker.SchemasPath+"../bigpicture/file-sync.json", b) + if err != nil { + respondWithError(w, http.StatusBadRequest, "eror on JSON validation: "+err.Error()) + + return + } + if !res.Valid() { + errorString := "" + for _, validErr := range res.Errors() { + errorString += validErr.String() + "\n\n" + } + respondWithError(w, http.StatusBadRequest, "JSON validation failed, reason: "+errorString) + + return + } + + if err := parseDatasetMessage(b); err != nil { + if err.Error() == "Dataset exists" { + w.WriteHeader(http.StatusAlreadyReported) + } else { + w.WriteHeader(http.StatusInternalServerError) + } + } + + w.WriteHeader(http.StatusOK) +} + +// parsemessage parses the JSON blob and sends the relevant messages +func parseDatasetMessage(msg []byte) error { + blob := syncDataset{} + _ = json.Unmarshal(msg, &blob) + + ds, err := Conf.API.DB.CheckIfDatasetExists(blob.DatasetID) + if err != nil { + return fmt.Errorf("Failed to check dataset existance: Reason %v", err) + } + if ds { + return fmt.Errorf("Dataset exists") + } + + var accessionIDs []string + for _, files := range blob.DatasetFiles { + ingest := common.Ingest{ + Type: "ingest", + User: blob.User, + FilePath: files.FilePath, + } + ingestMsg, err := json.Marshal(ingest) + if err != nil { + return fmt.Errorf("Failed to marshal json messge: Reason %v", err) + } + err = Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "ingest", true, ingestMsg) + if err != nil { + return fmt.Errorf("Failed to send ingest messge: Reason %v", err) + } + + accessionIDs = append(accessionIDs, files.FileID) + finalize := common.Finalize{ + Type: "accession", + User: blob.User, + Filepath: files.FilePath, + AccessionID: files.FileID, + DecryptedChecksums: []common.Checksums{{Type: "sha256", Value: files.ShaSum}}, + } + finalizeMsg, err := json.Marshal(finalize) + if err != nil { + return fmt.Errorf("Failed to marshal json messge: Reason %v", err) + } + err = Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "accessionIDs", true, finalizeMsg) + if err != nil { + return fmt.Errorf("Failed to send mapping messge: Reason %v", err) + } + } + + mappings := common.Mappings{ + Type: "mapping", + DatasetID: blob.DatasetID, + AccessionIDs: accessionIDs, + } + mappingMsg, err := json.Marshal(mappings) + if err != nil { + return fmt.Errorf("Failed to marshal json messge: Reason %v", err) + } + + err = Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "mappings", true, mappingMsg) + if err != nil { + return fmt.Errorf("Failed to send mapping messge: Reason %v", err) + } + + return nil +} + +func respondWithError(w http.ResponseWriter, code int, message string) { + respondWithJSON(w, code, map[string]string{"error": message}) +} + +func respondWithJSON(w http.ResponseWriter, code int, payload interface{}) { + log.Infoln(payload) + response, _ := json.Marshal(payload) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + _, err = w.Write(response) + if err != nil { + log.Errorf("failed to write HTTP response, reason: %v", err) + } +} + +func metadata(w http.ResponseWriter, r *http.Request) { + b, err := io.ReadAll(r.Body) + if err != nil { + respondWithError(w, http.StatusBadRequest, "failed to read request body") + + return + } + defer r.Body.Close() + // the filepath looks funkt for now, it will sort itself out when we switch to sda-common + res, err := common.ValidateJSON(Conf.Broker.SchemasPath+"bigpicture/metadata-sync.json", b) + if err != nil { + respondWithError(w, http.StatusBadRequest, "eror on JSON validation: "+err.Error()) + + return + } + if !res.Valid() { + errorString := "" + for _, validErr := range res.Errors() { + errorString += validErr.String() + "\n\n" + } + respondWithError(w, http.StatusBadRequest, "JSON validation failed, reason: "+errorString) + + return + } + + w.WriteHeader(http.StatusOK) +} + +func buildSyncDatasetJSON(b []byte) ([]byte, error) { + var msg common.Mappings + _ = json.Unmarshal(b, &msg) + + var dataset = syncDataset{ + DatasetID: msg.DatasetID, + } + + for _, ID := range msg.AccessionIDs { + if DBRes := checkDB(Conf.API.DB, 20*time.Millisecond); DBRes != nil { + log.Infof("DB connection error :%v", DBRes) + Conf.API.DB.Reconnect() + } + data, err := Conf.API.DB.GetSyncData(ID) + if err != nil { + return nil, err + } + datasetFile := datasetFiles{ + FilePath: data.FilePath, + FileID: ID, + ShaSum: data.Checksum, + } + dataset.DatasetFiles = append(dataset.DatasetFiles, datasetFile) + dataset.User = data.User + } + + json, err := json.Marshal(dataset) + if err != nil { + return nil, err + } + + return json, nil +} + +func sendPOST(payload []byte) error { + client := &http.Client{} + URL, err := createHostURL(Conf.Sync.Host, Conf.Sync.Port) + if err != nil { + return err + } + + req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload)) + if err != nil { + return err + } + req.SetBasicAuth(Conf.Sync.User, Conf.Sync.Password) + resp, err := client.Do(req) + if err != nil || resp.StatusCode != http.StatusOK { + return err + } + defer resp.Body.Close() + + return nil +} + +func createHostURL(host string, port int) (string, error) { + url, err := url.ParseRequestURI(host) + if err != nil { + return "", err + } + if url.Port() == "" && port != 0 { + url.Host += fmt.Sprintf(":%d", port) + } + url.Path = "/dataset" + + return url.String(), nil +} + +func basicAuth(auth http.HandlerFunc) http.HandlerFunc { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + username, password, ok := r.BasicAuth() + if ok { + usernameHash := sha256.Sum256([]byte(username)) + passwordHash := sha256.Sum256([]byte(password)) + expectedUsernameHash := sha256.Sum256([]byte(Conf.API.User)) + expectedPasswordHash := sha256.Sum256([]byte(Conf.API.Password)) + + usernameMatch := (subtle.ConstantTimeCompare(usernameHash[:], expectedUsernameHash[:]) == 1) + passwordMatch := (subtle.ConstantTimeCompare(passwordHash[:], expectedPasswordHash[:]) == 1) + + if usernameMatch && passwordMatch { + auth.ServeHTTP(w, r) + + return + } + } + + w.Header().Set("WWW-Authenticate", `Basic realm="restricted", charset="UTF-8"`) + http.Error(w, "Unauthorized", http.StatusUnauthorized) + }) +} diff --git a/sda/cmd/syncapi/syncapi.md b/sda/cmd/syncapi/syncapi.md new file mode 100644 index 000000000..915f5dbe0 --- /dev/null +++ b/sda/cmd/syncapi/syncapi.md @@ -0,0 +1,18 @@ +# sync-api + +The sync service is used in the [Bigpicture](https://bigpicture.eu/) project. + +## Service Description + +The sync service facilitates replication of data and metadata between the nodes in the consortium. + +When enabled the service will perform the following tasks: + +1. Read messages from the configured queue (sent by the mapper service upon succesful completion of a dataset maping). + 1. Generate a JSON blob with the required file and dataset information required to start and complete ingestion of a dataset on the recieving node. + 2. Send the JSON blob as POST request to the recieving partner. +2. Upon recieving a POST request with JSON data to the `/dataset` route. + 1. Parse the JSON blob and check if dataset is already registered, exit if true. + 2. Build and send messages to start ingestion of files. + 3. Build and send messages to assign stableIDs to files. + 4. Build and send messages to map files to a dataset. diff --git a/sda/cmd/syncapi/syncapi_test.go b/sda/cmd/syncapi/syncapi_test.go new file mode 100644 index 000000000..dbb531ec5 --- /dev/null +++ b/sda/cmd/syncapi/syncapi_test.go @@ -0,0 +1,362 @@ +package main + +import ( + "bytes" + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + "sda-pipeline/internal/broker" + "sda-pipeline/internal/config" + "sda-pipeline/internal/database" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/gorilla/mux" + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +type TestSuite struct { + suite.Suite +} + +func TestApiTestSuite(t *testing.T) { + suite.Run(t, new(TestSuite)) +} + +func TestSetup(t *testing.T) { + viper.Set("log.level", "debug") + viper.Set("log.format", "json") + + viper.Set("broker.host", "test") + viper.Set("broker.port", 123) + viper.Set("broker.user", "test") + viper.Set("broker.password", "test") + viper.Set("broker.queue", "test") + viper.Set("broker.routingkey", "test") + + viper.Set("db.host", "test") + viper.Set("db.port", 123) + viper.Set("db.user", "test") + viper.Set("db.password", "test") + viper.Set("db.database", "test") + + viper.Set("schema.type", "isolated") + + conf := config.Config{} + conf.API.Host = "localhost" + conf.API.Port = 8080 + server := setup(&conf) + + assert.Equal(t, "localhost:8080", server.Addr) +} + +func (suite *TestSuite) SetupTest() { + viper.Set("log.level", "debug") +} + +func TestShutdown(t *testing.T) { + Conf = &config.Config{} + Conf.Broker = broker.MQConf{ + Host: "localhost", + Port: 5672, + User: "test", + Password: "test", + RoutingKey: "test", + Exchange: "sda", + Ssl: false, + Vhost: "/test", + } + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + if err != nil { + t.Skip("skip TestShutdown since broker not present") + } + assert.NoError(t, err) + + Conf.Database = database.DBConf{ + Host: "localhost", + Port: 5432, + User: "lega_in", + Password: "lega_in", + Database: "lega", + SslMode: "disable", + } + Conf.API.DB, err = database.NewDB(Conf.Database) + if err != nil { + t.Skip("skip TestShutdown since broker not present") + } + assert.NoError(t, err) + + // make sure all conections are alive + assert.Equal(t, false, Conf.API.MQ.Channel.IsClosed()) + assert.Equal(t, false, Conf.API.MQ.Connection.IsClosed()) + assert.Equal(t, nil, Conf.API.DB.DB.Ping()) + + shutdown() + assert.Equal(t, true, Conf.API.MQ.Channel.IsClosed()) + assert.Equal(t, true, Conf.API.MQ.Connection.IsClosed()) + assert.Equal(t, "sql: database is closed", Conf.API.DB.DB.Ping().Error()) +} + +func TestReadinessResponse(t *testing.T) { + r := mux.NewRouter() + r.HandleFunc("/ready", readinessResponse) + ts := httptest.NewServer(r) + defer ts.Close() + + Conf = &config.Config{} + Conf.Broker = broker.MQConf{ + Host: "localhost", + Port: 5672, + User: "test", + Password: "test", + RoutingKey: "test", + Exchange: "sda", + Ssl: false, + Vhost: "/test", + } + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + if err != nil { + t.Skip("skip TestShutdown since broker not present") + } + assert.NoError(t, err) + + Conf.Database = database.DBConf{ + Host: "localhost", + Port: 5432, + User: "lega_in", + Password: "lega_in", + Database: "lega", + SslMode: "disable", + } + Conf.API.DB, err = database.NewDB(Conf.Database) + assert.NoError(t, err) + + res, err := http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, res.StatusCode) + defer res.Body.Close() + + // close the connection to force a reconneciton + Conf.API.MQ.Connection.Close() + res, err = http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusServiceUnavailable, res.StatusCode) + defer res.Body.Close() + + // reconnect should be fast so now this should pass + res, err = http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, res.StatusCode) + defer res.Body.Close() + + // close the channel to force a reconneciton + Conf.API.MQ.Channel.Close() + res, err = http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusServiceUnavailable, res.StatusCode) + defer res.Body.Close() + + // reconnect should be fast so now this should pass + res, err = http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, res.StatusCode) + defer res.Body.Close() + + // close DB connection to force a reconnection + Conf.API.DB.Close() + res, err = http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusServiceUnavailable, res.StatusCode) + defer res.Body.Close() + + // reconnect should be fast so now this should pass + res, err = http.Get(ts.URL + "/ready") + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, res.StatusCode) + defer res.Body.Close() +} + +func TestDatabasePingCheck(t *testing.T) { + database := database.SQLdb{} + assert.Error(t, checkDB(&database, 1*time.Second), "nil DB should fail") + + database.DB, _, err = sqlmock.New() + assert.NoError(t, err) + assert.NoError(t, checkDB(&database, 1*time.Second), "ping should succeed") +} + +func TestDatasetRoute(t *testing.T) { + Conf = &config.Config{} + Conf.Broker = broker.MQConf{ + Host: "localhost", + Port: 5672, + User: "test", + Password: "test", + RoutingKey: "test", + Exchange: "sda", + Ssl: false, + Vhost: "/test", + SchemasPath: "file://../../schemas/isolated/", + } + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + if err != nil { + t.Skip("skip TestShutdown since broker not present") + } + Conf.Database = database.DBConf{ + Host: "localhost", + Port: 5432, + User: "postgres", + Password: "postgres", + Database: "lega", + SslMode: "disable", + } + Conf.API.DB, err = database.NewDB(Conf.Database) + if err != nil { + t.Skip("skip TestShutdown since broker not present") + } + + r := mux.NewRouter() + r.HandleFunc("/dataset", dataset) + ts := httptest.NewServer(r) + defer ts.Close() + + goodJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e6", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) + good, err := http.Post(ts.URL+"/dataset", "application/json", bytes.NewBuffer(goodJSON)) + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, good.StatusCode) + defer good.Body.Close() + + badJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": []}`) + bad, err := http.Post(ts.URL+"/dataset", "application/json", bytes.NewBuffer(badJSON)) + assert.NoError(t, err) + assert.Equal(t, http.StatusBadRequest, bad.StatusCode) + defer bad.Body.Close() +} + +func TestMetadataRoute(t *testing.T) { + Conf = &config.Config{} + Conf.Broker.SchemasPath = "file://../../schemas/" + + r := mux.NewRouter() + r.HandleFunc("/metadata", metadata) + ts := httptest.NewServer(r) + defer ts.Close() + + goodJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "metadata": {"dummy":"data"}}`) + good, err := http.Post(ts.URL+"/metadata", "application/json", bytes.NewBuffer(goodJSON)) + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, good.StatusCode) + defer good.Body.Close() + + badJSON := []byte(`{"dataset_id": "phail", "metadata": {}}`) + bad, err := http.Post(ts.URL+"/metadata", "application/json", bytes.NewBuffer(badJSON)) + assert.NoError(t, err) + assert.Equal(t, http.StatusBadRequest, bad.StatusCode) + defer bad.Body.Close() +} + +func TestBuildJSON(t *testing.T) { + Conf = &config.Config{} + Conf.Database = database.DBConf{ + Host: "localhost", + Port: 5432, + User: "postgres", + Password: "postgres", + Database: "lega", + SslMode: "disable", + } + Conf.API.DB, err = database.NewDB(Conf.Database) + if err != nil { + t.Skip("skip TestShutdown since broker not present") + } + + db := Conf.API.DB.DB + + var fileID int64 + const insert = "INSERT INTO local_ega.main(submission_file_path, submission_user, decrypted_file_checksum, status, submission_file_extension) VALUES($1, $2, $3, 'READY', 'c4gh') RETURNING id;" + const accession = "UPDATE local_ega.files SET stable_id = $1 WHERE inbox_path = $2;" + const mapping = "INSERT INTO local_ega_ebi.filedataset(file_id, dataset_stable_id) VALUES ($1, 'cd532362-e06e-4460-8490-b9ce64b8d9e7');" + + err := db.QueryRow(insert, "dummy.user/test/file1.c4gh", "dummy.user", "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b").Scan(&fileID) + assert.NoError(t, err) + err = db.QueryRow(accession, "ed6af454-d910-49e3-8cda-488a6f246e76", "dummy.user/test/file1.c4gh").Err() + assert.NoError(t, err) + err = db.QueryRow(mapping, fileID).Err() + assert.NoError(t, err) + + err = db.QueryRow(insert, "dummy.user/test/file2.c4gh", "dummy.user", "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6").Scan(&fileID) + assert.NoError(t, err) + err = db.QueryRow(accession, "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "dummy.user/test/file2.c4gh").Err() + assert.NoError(t, err) + err = db.QueryRow(mapping, fileID).Err() + assert.NoError(t, err) + + m := []byte(`{"type":"mapping", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "accession_ids": ["5fe7b660-afea-4c3a-88a9-3daabf055ebb", "ed6af454-d910-49e3-8cda-488a6f246e76"]}`) + _, err = buildSyncDatasetJSON(m) + assert.NoError(t, err) +} + +func TestSendPOST(t *testing.T) { + r := http.NewServeMux() + r.HandleFunc("/dataset", func(w http.ResponseWriter, r *http.Request) { + _, err = w.Write([]byte(fmt.Sprint(http.StatusOK))) + assert.NoError(t, err) + }) + ts := httptest.NewServer(r) + defer ts.Close() + + Conf = &config.Config{} + Conf.Sync = config.SyncConf{ + Host: ts.URL, + User: "test", + Password: "test", + } + syncJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) + err := sendPOST(syncJSON) + assert.NoError(t, err) +} + +func TestCreateHostURL(t *testing.T) { + Conf = &config.Config{} + Conf.Sync = config.SyncConf{ + Host: "http://localhost", + Port: 443, + } + + s, err := createHostURL(Conf.Sync.Host, Conf.Sync.Port) + assert.NoError(t, err) + assert.Equal(t, "http://localhost:443/dataset", s) +} + +func TestBasicAuth(t *testing.T) { + Conf = &config.Config{} + Conf.Broker.SchemasPath = "file://../../schemas/" + Conf.API = config.APIConf{ + User: "dummy", + Password: "test", + } + + r := mux.NewRouter() + r.HandleFunc("/metadata", basicAuth(metadata)) + ts := httptest.NewServer(r) + defer ts.Close() + + goodJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "metadata": {"dummy":"data"}}`) + req, err := http.NewRequest("POST", ts.URL+"/metadata", bytes.NewBuffer(goodJSON)) + assert.NoError(t, err) + req.SetBasicAuth(Conf.API.User, Conf.API.Password) + good, err := ts.Client().Do(req) + assert.NoError(t, err) + assert.Equal(t, http.StatusOK, good.StatusCode) + defer good.Body.Close() + + req.SetBasicAuth(Conf.API.User, "wrongpass") + bad, err := ts.Client().Do(req) + assert.NoError(t, err) + assert.Equal(t, http.StatusUnauthorized, bad.StatusCode) + defer bad.Body.Close() +} diff --git a/sda/schemas/bigpicture/file-sync.json b/sda/schemas/bigpicture/file-sync.json new file mode 100644 index 000000000..1127acab2 --- /dev/null +++ b/sda/schemas/bigpicture/file-sync.json @@ -0,0 +1,119 @@ +{ + "title": "JSON schema for file syncing message interface.", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/bigpicture/file-sync.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "dataset_id", + "dataset_files", + "user" + ], + "additionalProperties": false, + "definitions": { + "dataset_files": { + "$id": "#/definitions/dataset_files", + "type": "object", + "minProperties": 3, + "maxProperties": 3, + "title": "File information schema", + "description": "Informations about a file", + "examples": [ + { + "filepath": "path/to/file", + "file_id": "16f3edd1-3c40-4284-9f82-1055361e655b", + "sha256": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "filepath", + "file_id", + "sha256" + ], + "additionalProperties": false, + "properties": { + "filepath": { + "$id": "#/definitions/dataset_files/properties/filepath", + "type": "string", + "title": "The inbox filepath", + "description": "The inbox filepath", + "minLength": 5 + }, + "file_id": { + "$id": "#/definitions/dataset_files/properties/file_id", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "minLength": 11, + "pattern": "^\\S+$", + "examples": [ + "16f3edd1-3c40-4284-9f82-1055361e655b" + ] + }, + "sha256": { + "$id": "#/definitions/checksum-sha256/properties/sha256", + "type": "string", + "title": "The decrypred checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + } + }, + "properties": { + "dataset_id": { + "$id": "#/properties/dataset_id", + "type": "string", + "title": "The Accession identifier for the dataset", + "description": "The Accession identifier for the dataset", + "minLength": 11, + "pattern": "^\\S+$", + "examples": [ + "anyidentifier" + ] + }, + "dataset_files": { + "$id": "#/properties/dataset_files", + "type": "array", + "title": "The files in that dataset", + "description": "The files in that dataset", + "minItems": 1, + "examples": [ + [ + { + "filepath": "path/to/file1.c4gh", + "file_id": "16f3edd1-3c40-4284-9f82-1055361e655b" + }, + { + "filepath": "path/to/file2.c4gh", + "file_id": "ba824437-ffc0-4431-b6a0-73968c1bb1ed" + } + ] + ], + "additionalItems": false, + "items": { + "$ref": "#/definitions/dataset_files", + "properties": { + "filepath": { + "$ref": "#/definitions/dataset_files/properties/filepath" + }, + "file_id": { + "$ref": "#/definitions/dataset_files/properties/file_id" + } + } + } + }, + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "minLength": 5, + "examples": [ + "user.name@example.com" + ] + } + } +} diff --git a/sda/schemas/bigpicture/metadata-sync.json b/sda/schemas/bigpicture/metadata-sync.json new file mode 100644 index 000000000..049cc345a --- /dev/null +++ b/sda/schemas/bigpicture/metadata-sync.json @@ -0,0 +1,32 @@ +{ + "title": "JSON schema for file syncing message interface.", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/bigpicture/metadata-sync.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "dataset_id", + "metadata" + ], + "additionalProperties": false, + "properties": { + "dataset_id": { + "$id": "#/properties/dataset_id", + "type": "string", + "title": "The Accession identifier for the dataset", + "description": "The Accession identifier for the dataset", + "minLength": 11, + "pattern": "^\\S+$", + "examples": [ + "anyidentifier" + ] + }, + "metadata": { + "$id": "#/properties/metadata", + "type": "object", + "title": "Metadata for the dataset", + "description": "Metadata for the dataset", + "minProperties": 1, + "pattern": "^\\S+$" + } + } +} From e4af27272c3891004daf1e0217780ed1bd3cf4bf Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Mon, 23 Oct 2023 15:20:40 +0200 Subject: [PATCH 14/34] [sync-api] make it work with the merged code base --- sda/cmd/syncapi/syncapi.go | 105 ++++++++------------- sda/cmd/syncapi/syncapi_test.go | 47 +++++---- sda/internal/config/config.go | 57 +++++++++++ sda/internal/config/config_test.go | 22 +++++ sda/internal/database/database.go | 6 ++ sda/internal/database/db_functions.go | 67 +++++++++++++ sda/internal/database/db_functions_test.go | 61 ++++++++++++ sda/internal/schema/schema.go | 25 +++++ sda/internal/schema/schema_test.go | 50 ++++++++++ 9 files changed, 351 insertions(+), 89 deletions(-) diff --git a/sda/cmd/syncapi/syncapi.go b/sda/cmd/syncapi/syncapi.go index ee16df229..2dd6f7bf5 100644 --- a/sda/cmd/syncapi/syncapi.go +++ b/sda/cmd/syncapi/syncapi.go @@ -16,12 +16,11 @@ import ( "syscall" "time" - "sda-pipeline/internal/broker" - "sda-pipeline/internal/common" - "sda-pipeline/internal/config" - "sda-pipeline/internal/database" - "github.com/gorilla/mux" + "github.com/neicnordic/sensitive-data-archive/internal/broker" + "github.com/neicnordic/sensitive-data-archive/internal/config" + "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/neicnordic/sensitive-data-archive/internal/schema" log "github.com/sirupsen/logrus" ) @@ -42,7 +41,7 @@ type datasetFiles struct { } func main() { - Conf, err = config.NewConfig("sync") + Conf, err = config.NewConfig("sync-api") if err != nil { log.Fatal(err) } @@ -50,7 +49,7 @@ func main() { if err != nil { log.Fatal(err) } - Conf.API.DB, err = database.NewDB(Conf.Database) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) if err != nil { log.Fatal(err) } @@ -71,25 +70,27 @@ func main() { } for m := range messages { log.Debugf("Received a message (corr-id: %s, message: %s)", m.CorrelationId, m.Body) - res, err := common.ValidateJSON(Conf.Broker.SchemasPath+"dataset-mapping.json", m.Body) + err := schema.ValidateJSON(fmt.Sprintf("%s/dataset-mapping.json", Conf.Broker.SchemasPath), m.Body) if err != nil { - if err := m.Nack(false, false); err != nil { - log.Errorf("Failed to nack message, reason: %v", err) + log.Errorf("validation of incoming message (dataset-mapping) failed, reason: (%s)", err.Error()) + // Send the message to an error queue so it can be analyzed. + infoErrorMessage := broker.InfoError{ + Error: "Message validation failed", + Reason: err.Error(), + OriginalMessage: m, } - continue - } - if !res.Valid() { - errorString := "" - for _, validErr := range res.Errors() { - errorString += validErr.String() + "\n\n" + body, _ := json.Marshal(infoErrorMessage) + if err := Conf.API.MQ.SendMessage(m.CorrelationId, Conf.Broker.Exchange, "error", body); err != nil { + log.Errorf("failed to publish message, reason: (%s)", err.Error()) } - if err := m.Nack(false, false); err != nil { - log.Errorf("Failed to nack message, reason: %v", err) + if err := m.Ack(false); err != nil { + log.Errorf("failed to Ack message, reason: (%s)", err.Error()) } continue } + log.Infoln("buildSyncDatasetJSON") blob, err := buildSyncDatasetJSON(m.Body) if err != nil { @@ -185,14 +186,14 @@ func readinessResponse(w http.ResponseWriter, r *http.Request) { if DBRes := checkDB(Conf.API.DB, 5*time.Millisecond); DBRes != nil { log.Debugf("DB connection error :%v", DBRes) - Conf.API.DB.Reconnect() + Conf.API.DB.Connect() statusCocde = http.StatusServiceUnavailable } w.WriteHeader(statusCocde) } -func checkDB(database *database.SQLdb, timeout time.Duration) error { +func checkDB(database *database.SDAdb, timeout time.Duration) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() if database.DB == nil { @@ -211,19 +212,8 @@ func dataset(w http.ResponseWriter, r *http.Request) { } defer r.Body.Close() - // the filepath looks funkt for now, it will sort itself out when we switch to sda-common - res, err := common.ValidateJSON(Conf.Broker.SchemasPath+"../bigpicture/file-sync.json", b) - if err != nil { - respondWithError(w, http.StatusBadRequest, "eror on JSON validation: "+err.Error()) - - return - } - if !res.Valid() { - errorString := "" - for _, validErr := range res.Errors() { - errorString += validErr.String() + "\n\n" - } - respondWithError(w, http.StatusBadRequest, "JSON validation failed, reason: "+errorString) + if err := schema.ValidateJSON(fmt.Sprintf("%s/../bigpicture/file-sync.json", Conf.Broker.SchemasPath), b); err != nil { + respondWithError(w, http.StatusBadRequest, fmt.Sprintf("eror on JSON validation: %s", err.Error())) return } @@ -254,7 +244,7 @@ func parseDatasetMessage(msg []byte) error { var accessionIDs []string for _, files := range blob.DatasetFiles { - ingest := common.Ingest{ + ingest := schema.IngestionTrigger{ Type: "ingest", User: blob.User, FilePath: files.FilePath, @@ -263,30 +253,30 @@ func parseDatasetMessage(msg []byte) error { if err != nil { return fmt.Errorf("Failed to marshal json messge: Reason %v", err) } - err = Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "ingest", true, ingestMsg) - if err != nil { + + if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "ingest", ingestMsg); err != nil { return fmt.Errorf("Failed to send ingest messge: Reason %v", err) } accessionIDs = append(accessionIDs, files.FileID) - finalize := common.Finalize{ + finalize := schema.IngestionAccession{ Type: "accession", User: blob.User, - Filepath: files.FilePath, + FilePath: files.FilePath, AccessionID: files.FileID, - DecryptedChecksums: []common.Checksums{{Type: "sha256", Value: files.ShaSum}}, + DecryptedChecksums: []schema.Checksums{{Type: "sha256", Value: files.ShaSum}}, } finalizeMsg, err := json.Marshal(finalize) if err != nil { return fmt.Errorf("Failed to marshal json messge: Reason %v", err) } - err = Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "accessionIDs", true, finalizeMsg) - if err != nil { + + if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "accession", finalizeMsg); err != nil { return fmt.Errorf("Failed to send mapping messge: Reason %v", err) } } - mappings := common.Mappings{ + mappings := schema.DatasetMapping{ Type: "mapping", DatasetID: blob.DatasetID, AccessionIDs: accessionIDs, @@ -296,8 +286,7 @@ func parseDatasetMessage(msg []byte) error { return fmt.Errorf("Failed to marshal json messge: Reason %v", err) } - err = Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "mappings", true, mappingMsg) - if err != nil { + if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "mappings", mappingMsg); err != nil { return fmt.Errorf("Failed to send mapping messge: Reason %v", err) } @@ -328,19 +317,9 @@ func metadata(w http.ResponseWriter, r *http.Request) { return } defer r.Body.Close() - // the filepath looks funkt for now, it will sort itself out when we switch to sda-common - res, err := common.ValidateJSON(Conf.Broker.SchemasPath+"bigpicture/metadata-sync.json", b) - if err != nil { - respondWithError(w, http.StatusBadRequest, "eror on JSON validation: "+err.Error()) - return - } - if !res.Valid() { - errorString := "" - for _, validErr := range res.Errors() { - errorString += validErr.String() + "\n\n" - } - respondWithError(w, http.StatusBadRequest, "JSON validation failed, reason: "+errorString) + if err := schema.ValidateJSON(fmt.Sprintf("%s/bigpicture/metadata-sync.json", Conf.Broker.SchemasPath), b); err != nil { + respondWithError(w, http.StatusBadRequest, err.Error()) return } @@ -349,7 +328,7 @@ func metadata(w http.ResponseWriter, r *http.Request) { } func buildSyncDatasetJSON(b []byte) ([]byte, error) { - var msg common.Mappings + var msg schema.DatasetMapping _ = json.Unmarshal(b, &msg) var dataset = syncDataset{ @@ -357,10 +336,6 @@ func buildSyncDatasetJSON(b []byte) ([]byte, error) { } for _, ID := range msg.AccessionIDs { - if DBRes := checkDB(Conf.API.DB, 20*time.Millisecond); DBRes != nil { - log.Infof("DB connection error :%v", DBRes) - Conf.API.DB.Reconnect() - } data, err := Conf.API.DB.GetSyncData(ID) if err != nil { return nil, err @@ -384,7 +359,7 @@ func buildSyncDatasetJSON(b []byte) ([]byte, error) { func sendPOST(payload []byte) error { client := &http.Client{} - URL, err := createHostURL(Conf.Sync.Host, Conf.Sync.Port) + URL, err := createHostURL(Conf.SyncAPI.RemoteHost, Conf.SyncAPI.RemotePort) if err != nil { return err } @@ -393,7 +368,7 @@ func sendPOST(payload []byte) error { if err != nil { return err } - req.SetBasicAuth(Conf.Sync.User, Conf.Sync.Password) + req.SetBasicAuth(Conf.SyncAPI.RemoteUser, Conf.SyncAPI.RemotePassword) resp, err := client.Do(req) if err != nil || resp.StatusCode != http.StatusOK { return err @@ -422,8 +397,8 @@ func basicAuth(auth http.HandlerFunc) http.HandlerFunc { if ok { usernameHash := sha256.Sum256([]byte(username)) passwordHash := sha256.Sum256([]byte(password)) - expectedUsernameHash := sha256.Sum256([]byte(Conf.API.User)) - expectedPasswordHash := sha256.Sum256([]byte(Conf.API.Password)) + expectedUsernameHash := sha256.Sum256([]byte(Conf.SyncAPI.APIUser)) + expectedPasswordHash := sha256.Sum256([]byte(Conf.SyncAPI.APIPassword)) usernameMatch := (subtle.ConstantTimeCompare(usernameHash[:], expectedUsernameHash[:]) == 1) passwordMatch := (subtle.ConstantTimeCompare(passwordHash[:], expectedPasswordHash[:]) == 1) diff --git a/sda/cmd/syncapi/syncapi_test.go b/sda/cmd/syncapi/syncapi_test.go index dbb531ec5..38024bde7 100644 --- a/sda/cmd/syncapi/syncapi_test.go +++ b/sda/cmd/syncapi/syncapi_test.go @@ -8,12 +8,11 @@ import ( "testing" "time" - "sda-pipeline/internal/broker" - "sda-pipeline/internal/config" - "sda-pipeline/internal/database" - "github.com/DATA-DOG/go-sqlmock" "github.com/gorilla/mux" + "github.com/neicnordic/sensitive-data-archive/internal/broker" + "github.com/neicnordic/sensitive-data-archive/internal/config" + "github.com/neicnordic/sensitive-data-archive/internal/database" "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" @@ -84,7 +83,7 @@ func TestShutdown(t *testing.T) { Database: "lega", SslMode: "disable", } - Conf.API.DB, err = database.NewDB(Conf.Database) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) if err != nil { t.Skip("skip TestShutdown since broker not present") } @@ -132,7 +131,7 @@ func TestReadinessResponse(t *testing.T) { Database: "lega", SslMode: "disable", } - Conf.API.DB, err = database.NewDB(Conf.Database) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) assert.NoError(t, err) res, err := http.Get(ts.URL + "/ready") @@ -181,7 +180,7 @@ func TestReadinessResponse(t *testing.T) { } func TestDatabasePingCheck(t *testing.T) { - database := database.SQLdb{} + database := database.SDAdb{} assert.Error(t, checkDB(&database, 1*time.Second), "nil DB should fail") database.DB, _, err = sqlmock.New() @@ -214,7 +213,7 @@ func TestDatasetRoute(t *testing.T) { Database: "lega", SslMode: "disable", } - Conf.API.DB, err = database.NewDB(Conf.Database) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) if err != nil { t.Skip("skip TestShutdown since broker not present") } @@ -239,7 +238,7 @@ func TestDatasetRoute(t *testing.T) { func TestMetadataRoute(t *testing.T) { Conf = &config.Config{} - Conf.Broker.SchemasPath = "file://../../schemas/" + Conf.Broker.SchemasPath = "../../schemas" r := mux.NewRouter() r.HandleFunc("/metadata", metadata) @@ -269,7 +268,7 @@ func TestBuildJSON(t *testing.T) { Database: "lega", SslMode: "disable", } - Conf.API.DB, err = database.NewDB(Conf.Database) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) if err != nil { t.Skip("skip TestShutdown since broker not present") } @@ -310,10 +309,10 @@ func TestSendPOST(t *testing.T) { defer ts.Close() Conf = &config.Config{} - Conf.Sync = config.SyncConf{ - Host: ts.URL, - User: "test", - Password: "test", + Conf.SyncAPI = config.SyncAPIConf{ + RemoteHost: ts.URL, + RemoteUser: "test", + RemotePassword: "test", } syncJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) err := sendPOST(syncJSON) @@ -322,22 +321,22 @@ func TestSendPOST(t *testing.T) { func TestCreateHostURL(t *testing.T) { Conf = &config.Config{} - Conf.Sync = config.SyncConf{ - Host: "http://localhost", - Port: 443, + Conf.SyncAPI = config.SyncAPIConf{ + RemoteHost: "http://localhost", + RemotePort: 443, } - s, err := createHostURL(Conf.Sync.Host, Conf.Sync.Port) + s, err := createHostURL(Conf.SyncAPI.RemoteHost, Conf.SyncAPI.RemotePort) assert.NoError(t, err) assert.Equal(t, "http://localhost:443/dataset", s) } func TestBasicAuth(t *testing.T) { Conf = &config.Config{} - Conf.Broker.SchemasPath = "file://../../schemas/" - Conf.API = config.APIConf{ - User: "dummy", - Password: "test", + Conf.Broker.SchemasPath = "../../schemas" + Conf.SyncAPI = config.SyncAPIConf{ + APIUser: "dummy", + APIPassword: "test", } r := mux.NewRouter() @@ -348,13 +347,13 @@ func TestBasicAuth(t *testing.T) { goodJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "metadata": {"dummy":"data"}}`) req, err := http.NewRequest("POST", ts.URL+"/metadata", bytes.NewBuffer(goodJSON)) assert.NoError(t, err) - req.SetBasicAuth(Conf.API.User, Conf.API.Password) + req.SetBasicAuth(Conf.SyncAPI.APIUser, Conf.SyncAPI.APIPassword) good, err := ts.Client().Do(req) assert.NoError(t, err) assert.Equal(t, http.StatusOK, good.StatusCode) defer good.Body.Close() - req.SetBasicAuth(Conf.API.User, "wrongpass") + req.SetBasicAuth(Conf.SyncAPI.APIUser, "wrongpass") bad, err := ts.Client().Do(req) assert.NoError(t, err) assert.Equal(t, http.StatusUnauthorized, bad.StatusCode) diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 9b50a5a75..71e528971 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -47,6 +47,16 @@ type Config struct { Notify SMTPConf Orchestrator OrchestratorConf Sync storage.Conf + SyncAPI SyncAPIConf +} + +type SyncAPIConf struct { + APIPassword string + APIUser string + RemoteHost string + RemotePassword string + RemotePort int + RemoteUser string } type APIConf struct { @@ -294,6 +304,17 @@ func NewConfig(app string) (*Config, error) { default: return nil, fmt.Errorf("sync.destination.type not set") } + case "sync-api": + requiredConfVars = []string{ + "broker.exchange", + "broker.host", + "broker.port", + "broker.user", + "broker.password", + "broker.queue", + "sync.api.user", + "sync.api.password", + } case "verify": requiredConfVars = []string{ "broker.host", @@ -460,6 +481,28 @@ func NewConfig(app string) (*Config, error) { c.configSyncDestination() c.configSchemas() + case "sync-api": + if viper.IsSet("db.host") { + if err := c.configDatabase(); err != nil { + return nil, err + } + } + + if err := c.configBroker(); err != nil { + return nil, err + } + + if err := c.configAPI(); err != nil { + return nil, err + } + + if viper.IsSet("sync.api.remote.host") { + c.configSyncAPI() + } + + c.configSchemas() + + return c, nil case "verify": c.configArchive() @@ -787,6 +830,20 @@ func (c *Config) configSyncDestination() { } } +// configSync provides configuration for the outgoing sync settings +func (c *Config) configSyncAPI() { + c.SyncAPI = SyncAPIConf{} + c.SyncAPI.APIPassword = viper.GetString("sync.api.password") + c.SyncAPI.APIUser = viper.GetString("sync.api.user") + + c.SyncAPI.RemoteHost = viper.GetString("sync.api.remote.host") + if viper.IsSet("sync.api.remote.port") { + c.SyncAPI.RemotePort = viper.GetInt("sync.api.remote.port") + } + c.SyncAPI.RemotePassword = viper.GetString("sync.api.remote.pass") + c.SyncAPI.RemoteUser = viper.GetString("sync.api.remote.user") +} + // GetC4GHKey reads and decrypts and returns the c4gh key func GetC4GHKey() (*[32]byte, error) { keyPath := viper.GetString("c4gh.filepath") diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 466ea7a22..fd8bb5fed 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -341,3 +341,25 @@ func (suite *ConfigTestSuite) TestGetC4GHKey() { defer os.RemoveAll(keyPath) } + +func (suite *ConfigTestSuite) TestConfigSyncAPI() { + suite.SetupTest() + noConfig, err := NewConfig("sync-api") + assert.Error(suite.T(), err) + assert.Nil(suite.T(), noConfig) + + viper.Set("sync.api.user", "user") + viper.Set("sync.api.password", "password") + viper.Set("sync.api.remote.host", "remote-host") + viper.Set("sync.api.remote.port", 1234) + viper.Set("sync.api.remote.user", "remote-user") + viper.Set("sync.api.remote.pass", "remote-pass") + config, err := NewConfig("sync-api") + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), "remote-host", config.SyncAPI.RemoteHost) + assert.Equal(suite.T(), 1234, config.SyncAPI.RemotePort) + assert.Equal(suite.T(), "remote-user", config.SyncAPI.RemoteUser) + assert.Equal(suite.T(), "remote-pass", config.SyncAPI.RemotePassword) + assert.Equal(suite.T(), "user", config.SyncAPI.APIUser) + assert.Equal(suite.T(), "password", config.SyncAPI.APIPassword) +} diff --git a/sda/internal/database/database.go b/sda/internal/database/database.go index be16c19d4..478f1c349 100644 --- a/sda/internal/database/database.go +++ b/sda/internal/database/database.go @@ -42,6 +42,12 @@ type FileInfo struct { DecryptedSize int64 } +type SyncData struct { + User string + FilePath string + Checksum string +} + // SchemaName is the name of the remote database schema to query var SchemaName = "sda" diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index b53e0694d..f74216809 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -526,3 +526,70 @@ func (dbs *SDAdb) GetHeaderForStableID(stableID string) ([]byte, error) { return header, nil } + +// GetSyncData retrieves the file information needed to sync a dataset +func (dbs *SDAdb) GetSyncData(accessionID string) (SyncData, error) { + var ( + s SyncData + err error + ) + + for count := 1; count <= RetryTimes; count++ { + s, err = dbs.getSyncData(accessionID) + if err == nil { + break + } + time.Sleep(time.Duration(math.Pow(3, float64(count))) * time.Second) + } + + return s, err +} + +// getSyncData is the actual function performing work for GetSyncData +func (dbs *SDAdb) getSyncData(accessionID string) (SyncData, error) { + dbs.checkAndReconnectIfNeeded() + + const query = "SELECT submission_user, submission_file_path from sda.files WHERE stable_id = $1;" + var data SyncData + if err := dbs.DB.QueryRow(query, accessionID).Scan(&data.User, &data.FilePath); err != nil { + return SyncData{}, err + } + + const checksum = "SELECT checksum from sda.checksums WHERE source = 'UNENCRYPTED' and file_id = (SELECT id FROM sda.files WHERE stable_id = $1);" + if err := dbs.DB.QueryRow(checksum, accessionID).Scan(&data.Checksum); err != nil { + return SyncData{}, err + } + + return data, nil +} + +// CheckIfDatasetExists checks if a dataset already is registered +func (dbs *SDAdb) CheckIfDatasetExists(datasetID string) (bool, error) { + var ( + ds bool + err error + ) + + for count := 1; count <= RetryTimes; count++ { + ds, err = dbs.checkIfDatasetExists(datasetID) + if err == nil { + break + } + time.Sleep(time.Duration(math.Pow(3, float64(count))) * time.Second) + } + + return ds, err +} + +// getSyncData is the actual function performing work for GetSyncData +func (dbs *SDAdb) checkIfDatasetExists(datasetID string) (bool, error) { + dbs.checkAndReconnectIfNeeded() + + const query = "SELECT EXISTS(SELECT id from sda.datasets WHERE stable_id = $1);" + var yesNo bool + if err := dbs.DB.QueryRow(query, datasetID).Scan(&yesNo); err != nil { + return yesNo, err + } + + return yesNo, nil +} diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index 462f4a171..54656d28e 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -384,3 +384,64 @@ func (suite *DatabaseTests) TestGetHeaderForStableID() { assert.NoError(suite.T(), err, "failed to get header for stable ID: %v", err) assert.Equal(suite.T(), header, []byte("HEADER"), "did not get expected header") } + +func (suite *DatabaseTests) TestGetSyncData() { + db, err := NewSDAdb(suite.dbConf) + assert.NoError(suite.T(), err, "got %v when creating new connection", err) + + // register a file in the database + fileID, err := db.RegisterFile("/testuser/TestGetGetSyncData.c4gh", "testuser") + assert.NoError(suite.T(), err, "failed to register file in database") + + checksum := sha256.New() + fileInfo := FileInfo{sha256.New(), 1234, "/tmp/TestGetGetSyncData.c4gh", checksum, 999} + corrID := uuid.New().String() + err = db.SetArchived(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Archived") + + err = db.markCompleted(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Verified") + + stableID := "TEST:000-1111-2222" + err = db.SetAccessionID(stableID, fileID) + assert.NoError(suite.T(), err, "got (%v) when setting stable ID: %s, %s", err, stableID, fileID) + + fileData, err := db.getSyncData("TEST:000-1111-2222") + assert.NoError(suite.T(), err, "failed to get sync data for file") + assert.Equal(suite.T(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", fileData.Checksum, "did not get expected file checksum") + assert.Equal(suite.T(), "/testuser/TestGetGetSyncData.c4gh", fileData.FilePath, "did not get expected file path") + assert.Equal(suite.T(), "testuser", fileData.User, "did not get expected user") +} + +func (suite *DatabaseTests) TestCheckIfDatasetExists() { + db, err := NewSDAdb(suite.dbConf) + assert.NoError(suite.T(), err, "got %v when creating new connection", err) + + accessions := []string{} + for i := 0; i <= 3; i++ { + fileID, err := db.RegisterFile(fmt.Sprintf("/testuser/TestCheckIfDatasetExists-%d.c4gh", i), "testuser") + assert.NoError(suite.T(), err, "failed to register file in database") + + err = db.SetAccessionID(fmt.Sprintf("accession-%d", i), fileID) + assert.NoError(suite.T(), err, "got (%v) when getting file archive information", err) + + accessions = append(accessions, fmt.Sprintf("accession-%d", i)) + } + + diSet := map[string][]string{ + "dataset": accessions[0:3], + } + + for di, acs := range diSet { + err := db.MapFilesToDataset(di, acs) + assert.NoError(suite.T(), err, "failed to map file to dataset") + } + + ok, err := db.checkIfDatasetExists("dataset") + assert.NoError(suite.T(), err, "check if dataset exists failed") + assert.Equal(suite.T(), ok, true) + + ok, err = db.checkIfDatasetExists("missing dataset") + assert.NoError(suite.T(), err, "check if dataset exists failed") + assert.Equal(suite.T(), ok, false) +} diff --git a/sda/internal/schema/schema.go b/sda/internal/schema/schema.go index 9c2c68eec..2f6c4c022 100644 --- a/sda/internal/schema/schema.go +++ b/sda/internal/schema/schema.go @@ -62,6 +62,10 @@ func getStructName(path string) interface{} { return new(IngestionUserError) case "ingestion-verification": return new(IngestionVerification) + case "file-sync": + return new(SyncDataset) + case "metadata-sync": + return new(SyncMetadata) default: return "" } @@ -155,3 +159,24 @@ type IngestionVerification struct { EncryptedChecksums []Checksums `json:"encrypted_checksums"` ReVerify bool `json:"re_verify"` } + +type SyncDataset struct { + DatasetID string `json:"dataset_id"` + DatasetFiles []DatasetFiles `json:"dataset_files"` + User string `json:"user"` +} + +type DatasetFiles struct { + FilePath string `json:"filepath"` + FileID string `json:"file_id"` + ShaSum string `json:"sha256"` +} + +type SyncMetadata struct { + DatasetID string `json:"dataset_id"` + Metadata interface{} `json:"metadata"` +} + +type Metadata struct { + Metadata interface{} +} diff --git a/sda/internal/schema/schema_test.go b/sda/internal/schema/schema_test.go index d3600f956..5ea00d7f4 100644 --- a/sda/internal/schema/schema_test.go +++ b/sda/internal/schema/schema_test.go @@ -416,3 +416,53 @@ func TestValidateJSONIsolatedIngestionCompletion(t *testing.T) { msg, _ = json.Marshal(badMsg) assert.Error(t, ValidateJSON(fmt.Sprintf("%s/isolated/ingestion-completion.json", schemaPath), msg)) } + +func TestValidateJSONBigpictureFileSync(t *testing.T) { + okMsg := SyncDataset{ + DatasetID: "cd532362-e06e-4460-8490-b9ce64b8d9e7", + DatasetFiles: []DatasetFiles{ + { + FilePath: "inbox/user/file1.c4gh", + FileID: "5fe7b660-afea-4c3a-88a9-3daabf055ebb", + ShaSum: "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6", + }, + { + FilePath: "inbox/user/file2.c4gh", + FileID: "ed6af454-d910-49e3-8cda-488a6f246e76", + ShaSum: "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b", + }, + }, + User: "test.user@example.com", + } + + msg, _ := json.Marshal(okMsg) + assert.Nil(t, ValidateJSON(fmt.Sprintf("%s/bigpicture/file-sync.json", schemaPath), msg)) + + badMsg := SyncDataset{ + DatasetID: "cd532362-e06e-4460-8490-b9ce64b8d9e7", + DatasetFiles: []DatasetFiles{{}}, + } + + msg, _ = json.Marshal(badMsg) + assert.Error(t, ValidateJSON(fmt.Sprintf("%s/bigpicture/file-sync.json", schemaPath), msg)) +} + +func TestValidateJSONBigpictureMetadtaSync(t *testing.T) { + okMsg := SyncMetadata{ + DatasetID: "cd532362-e06e-4460-8490-b9ce64b8d9e7", + Metadata: Metadata{ + Metadata: "foo", + }, + } + + msg, _ := json.Marshal(okMsg) + assert.Nil(t, ValidateJSON(fmt.Sprintf("%s/bigpicture/metadata-sync.json", schemaPath), msg)) + + badMsg := SyncMetadata{ + DatasetID: "cd532362-e06e-4460-8490-b9ce64b8d9e7", + Metadata: nil, + } + + msg, _ = json.Marshal(badMsg) + assert.Error(t, ValidateJSON(fmt.Sprintf("%s/bigpicture/metadata-sync.json", schemaPath), msg)) +} From 9d68a9b2c2e7ef117ad1b8d577f21620c62f6015 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Wed, 25 Oct 2023 08:46:30 +0200 Subject: [PATCH 15/34] [sync-api][test] convert to run as suite. * Starts a real MQ and DB using `ory/dockertests` * Replaces mock SQL backend with real DB --- sda/cmd/syncapi/syncapi_test.go | 500 +++++++++++++++++++------------- 1 file changed, 305 insertions(+), 195 deletions(-) diff --git a/sda/cmd/syncapi/syncapi_test.go b/sda/cmd/syncapi/syncapi_test.go index 38024bde7..bb73d6295 100644 --- a/sda/cmd/syncapi/syncapi_test.go +++ b/sda/cmd/syncapi/syncapi_test.go @@ -2,241 +2,354 @@ package main import ( "bytes" + "context" + "crypto/sha256" + "database/sql" "fmt" "net/http" "net/http/httptest" + "os" + "path" + "runtime" + "strconv" "testing" "time" - "github.com/DATA-DOG/go-sqlmock" + "github.com/google/uuid" "github.com/gorilla/mux" "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/ory/dockertest/v3" + "github.com/ory/dockertest/v3/docker" "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" + + log "github.com/sirupsen/logrus" ) -type TestSuite struct { +var dbPort, mqPort int + +type SyncAPITest struct { suite.Suite } -func TestApiTestSuite(t *testing.T) { - suite.Run(t, new(TestSuite)) +func TestSyncAPITestSuite(t *testing.T) { + suite.Run(t, new(SyncAPITest)) } -func TestSetup(t *testing.T) { - viper.Set("log.level", "debug") - viper.Set("log.format", "json") +func TestMain(m *testing.M) { + if _, err := os.Stat("/.dockerenv"); err == nil { + m.Run() + } + _, b, _, _ := runtime.Caller(0) + rootDir := path.Join(path.Dir(b), "../../../") - viper.Set("broker.host", "test") - viper.Set("broker.port", 123) - viper.Set("broker.user", "test") - viper.Set("broker.password", "test") - viper.Set("broker.queue", "test") - viper.Set("broker.routingkey", "test") + // uses a sensible default on windows (tcp/http) and linux/osx (socket) + pool, err := dockertest.NewPool("") + if err != nil { + log.Fatalf("Could not construct pool: %s", err) + } - viper.Set("db.host", "test") - viper.Set("db.port", 123) - viper.Set("db.user", "test") - viper.Set("db.password", "test") - viper.Set("db.database", "test") + // uses pool to try to connect to Docker + err = pool.Client.Ping() + if err != nil { + log.Fatalf("Could not connect to Docker: %s", err) + } - viper.Set("schema.type", "isolated") + // pulls an image, creates a container based on it and runs it + postgres, err := pool.RunWithOptions(&dockertest.RunOptions{ + Repository: "postgres", + Tag: "15.2-alpine3.17", + Env: []string{ + "POSTGRES_PASSWORD=rootpasswd", + "POSTGRES_DB=sda", + }, + Mounts: []string{ + fmt.Sprintf("%s/postgresql/initdb.d:/docker-entrypoint-initdb.d", rootDir), + }, + }, func(config *docker.HostConfig) { + // set AutoRemove to true so that stopped container goes away by itself + config.AutoRemove = true + config.RestartPolicy = docker.RestartPolicy{ + Name: "no", + } + }) + if err != nil { + log.Fatalf("Could not start resource: %s", err) + } - conf := config.Config{} - conf.API.Host = "localhost" - conf.API.Port = 8080 - server := setup(&conf) + dbHostAndPort := postgres.GetHostPort("5432/tcp") + dbPort, _ = strconv.Atoi(postgres.GetPort("5432/tcp")) + databaseURL := fmt.Sprintf("postgres://postgres:rootpasswd@%s/sda?sslmode=disable", dbHostAndPort) - assert.Equal(t, "localhost:8080", server.Addr) -} + pool.MaxWait = 120 * time.Second + if err = pool.Retry(func() error { + db, err := sql.Open("postgres", databaseURL) + if err != nil { + log.Println(err) -func (suite *TestSuite) SetupTest() { - viper.Set("log.level", "debug") -} + return err + } -func TestShutdown(t *testing.T) { - Conf = &config.Config{} - Conf.Broker = broker.MQConf{ - Host: "localhost", - Port: 5672, - User: "test", - Password: "test", - RoutingKey: "test", - Exchange: "sda", - Ssl: false, - Vhost: "/test", + query := "SELECT MAX(version) FROM sda.dbschema_version" + var dbVersion int + + return db.QueryRow(query).Scan(&dbVersion) + }); err != nil { + log.Fatalf("Could not connect to postgres: %s", err) } - Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + + // pulls an image, creates a container based on it and runs it + rabbitmq, err := pool.RunWithOptions(&dockertest.RunOptions{ + Repository: "rabbitmq", + Tag: "3-management-alpine", + }, func(config *docker.HostConfig) { + // set AutoRemove to true so that stopped container goes away by itself + config.AutoRemove = true + config.RestartPolicy = docker.RestartPolicy{ + Name: "no", + } + }) if err != nil { - t.Skip("skip TestShutdown since broker not present") + log.Fatalf("Could not start resource: %s", err) } - assert.NoError(t, err) - - Conf.Database = database.DBConf{ - Host: "localhost", - Port: 5432, - User: "lega_in", - Password: "lega_in", - Database: "lega", - SslMode: "disable", - } - Conf.API.DB, err = database.NewSDAdb(Conf.Database) + + mqPort, _ = strconv.Atoi(rabbitmq.GetPort("5672/tcp")) + mqHostAndPort := rabbitmq.GetHostPort("15672/tcp") + + client := http.Client{Timeout: 5 * time.Second} + req, err := http.NewRequest(http.MethodGet, "http://"+mqHostAndPort+"/api/users", http.NoBody) if err != nil { - t.Skip("skip TestShutdown since broker not present") + log.Fatal(err) + } + req.SetBasicAuth("guest", "guest") + + // exponential backoff-retry, because the application in the container might not be ready to accept connections yet + if err := pool.Retry(func() error { + res, err := client.Do(req) + if err != nil { + return err + } + res.Body.Close() + + return nil + }); err != nil { + if err := pool.Purge(rabbitmq); err != nil { + log.Fatalf("Could not purge resource: %s", err) + } + log.Fatalf("Could not connect to rabbitmq: %s", err) + } + + log.Println("starting tests") + _ = m.Run() + + log.Println("tests completed") + if err := pool.Purge(postgres); err != nil { + log.Fatalf("Could not purge resource: %s", err) + } + if err := pool.Purge(rabbitmq); err != nil { + log.Fatalf("Could not purge resource: %s", err) } - assert.NoError(t, err) + pvo := docker.PruneVolumesOptions{Filters: make(map[string][]string), Context: context.Background()} + if _, err := pool.Client.PruneVolumes(pvo); err != nil { + log.Fatalf("could not prune docker volumes: %s", err.Error()) + } +} + +func (suite *SyncAPITest) SetupTest() { + viper.Set("log.level", "debug") + viper.Set("log.format", "json") + + viper.Set("broker.host", "127.0.0.1") + viper.Set("broker.port", mqPort) + viper.Set("broker.user", "guest") + viper.Set("broker.password", "guest") + viper.Set("broker.queue", "mappings") + viper.Set("broker.exchange", "amq.direct") + viper.Set("broker.vhost", "/") + + viper.Set("db.host", "127.0.0.1") + viper.Set("db.port", dbPort) + viper.Set("db.user", "postgres") + viper.Set("db.password", "rootpasswd") + viper.Set("db.database", "sda") + viper.Set("db.sslmode", "disable") + + viper.Set("schema.type", "isolated") + + viper.Set("sync.api.user", "dummy") + viper.Set("sync.api.password", "admin") +} + +func (suite *SyncAPITest) TestSetup() { + suite.SetupTest() + + conf, err := config.NewConfig("sync-api") + assert.NoError(suite.T(), err, "Failed to setup config") + assert.Equal(suite.T(), mqPort, conf.Broker.Port) + assert.Equal(suite.T(), mqPort, viper.GetInt("broker.port")) + + server := setup(conf) + assert.Equal(suite.T(), "0.0.0.0:8080", server.Addr) +} + +func (suite *SyncAPITest) TestShutdown() { + suite.SetupTest() + Conf, err = config.NewConfig("sync-api") + assert.NoError(suite.T(), err) + + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), "127.0.0.1", Conf.API.MQ.Conf.Host) + + Conf.API.DB, err = database.NewSDAdb(Conf.Database) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), "127.0.0.1", Conf.API.DB.Config.Host) // make sure all conections are alive - assert.Equal(t, false, Conf.API.MQ.Channel.IsClosed()) - assert.Equal(t, false, Conf.API.MQ.Connection.IsClosed()) - assert.Equal(t, nil, Conf.API.DB.DB.Ping()) + assert.Equal(suite.T(), false, Conf.API.MQ.Channel.IsClosed()) + assert.Equal(suite.T(), false, Conf.API.MQ.Connection.IsClosed()) + assert.Equal(suite.T(), nil, Conf.API.DB.DB.Ping()) shutdown() - assert.Equal(t, true, Conf.API.MQ.Channel.IsClosed()) - assert.Equal(t, true, Conf.API.MQ.Connection.IsClosed()) - assert.Equal(t, "sql: database is closed", Conf.API.DB.DB.Ping().Error()) + assert.Equal(suite.T(), true, Conf.API.MQ.Channel.IsClosed()) + assert.Equal(suite.T(), true, Conf.API.MQ.Connection.IsClosed()) + assert.Equal(suite.T(), "sql: database is closed", Conf.API.DB.DB.Ping().Error()) } -func TestReadinessResponse(t *testing.T) { +func (suite *SyncAPITest) TestReadinessResponse() { + suite.SetupTest() + Conf, err = config.NewConfig("sync-api") + assert.NoError(suite.T(), err) + + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + assert.NoError(suite.T(), err) + + Conf.API.DB, err = database.NewSDAdb(Conf.Database) + assert.NoError(suite.T(), err) + r := mux.NewRouter() r.HandleFunc("/ready", readinessResponse) ts := httptest.NewServer(r) defer ts.Close() - Conf = &config.Config{} - Conf.Broker = broker.MQConf{ - Host: "localhost", - Port: 5672, - User: "test", - Password: "test", - RoutingKey: "test", - Exchange: "sda", - Ssl: false, - Vhost: "/test", - } - Conf.API.MQ, err = broker.NewMQ(Conf.Broker) - if err != nil { - t.Skip("skip TestShutdown since broker not present") - } - assert.NoError(t, err) - - Conf.Database = database.DBConf{ - Host: "localhost", - Port: 5432, - User: "lega_in", - Password: "lega_in", - Database: "lega", - SslMode: "disable", - } - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - assert.NoError(t, err) - res, err := http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, res.StatusCode) defer res.Body.Close() // close the connection to force a reconneciton Conf.API.MQ.Connection.Close() res, err = http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusServiceUnavailable, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusServiceUnavailable, res.StatusCode) defer res.Body.Close() // reconnect should be fast so now this should pass res, err = http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, res.StatusCode) defer res.Body.Close() // close the channel to force a reconneciton Conf.API.MQ.Channel.Close() res, err = http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusServiceUnavailable, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusServiceUnavailable, res.StatusCode) defer res.Body.Close() // reconnect should be fast so now this should pass res, err = http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, res.StatusCode) defer res.Body.Close() // close DB connection to force a reconnection Conf.API.DB.Close() res, err = http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusServiceUnavailable, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusServiceUnavailable, res.StatusCode) defer res.Body.Close() // reconnect should be fast so now this should pass res, err = http.Get(ts.URL + "/ready") - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, res.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, res.StatusCode) defer res.Body.Close() } -func TestDatabasePingCheck(t *testing.T) { - database := database.SDAdb{} - assert.Error(t, checkDB(&database, 1*time.Second), "nil DB should fail") +func (suite *SyncAPITest) TestDatabasePingCheck() { + suite.SetupTest() + Conf, err = config.NewConfig("sync-api") + assert.NoError(suite.T(), err) - database.DB, _, err = sqlmock.New() - assert.NoError(t, err) - assert.NoError(t, checkDB(&database, 1*time.Second), "ping should succeed") + noDB := database.SDAdb{} + assert.Error(suite.T(), checkDB(&noDB, 1*time.Second), "nil DB should fail") + + Conf.API.DB, err = database.NewSDAdb(Conf.Database) + assert.NoError(suite.T(), err) + assert.NoError(suite.T(), checkDB(Conf.API.DB, 1*time.Second), "ping should succeed") } -func TestDatasetRoute(t *testing.T) { - Conf = &config.Config{} - Conf.Broker = broker.MQConf{ - Host: "localhost", - Port: 5672, - User: "test", - Password: "test", - RoutingKey: "test", - Exchange: "sda", - Ssl: false, - Vhost: "/test", - SchemasPath: "file://../../schemas/isolated/", - } +func (suite *SyncAPITest) TestDatasetRoute() { + suite.SetupTest() + Conf, err = config.NewConfig("sync-api") + assert.NoError(suite.T(), err) + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) - if err != nil { - t.Skip("skip TestShutdown since broker not present") - } - Conf.Database = database.DBConf{ - Host: "localhost", - Port: 5432, - User: "postgres", - Password: "postgres", - Database: "lega", - SslMode: "disable", - } + assert.NoError(suite.T(), err) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) - if err != nil { - t.Skip("skip TestShutdown since broker not present") - } + assert.NoError(suite.T(), err) + + Conf.Broker.SchemasPath = "../../schemas/isolated/" r := mux.NewRouter() r.HandleFunc("/dataset", dataset) ts := httptest.NewServer(r) defer ts.Close() - goodJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e6", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) + goodJSON := []byte(`{"user": "test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e6", "dataset_files": [{"filepath": "inbox/user/file-1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) good, err := http.Post(ts.URL+"/dataset", "application/json", bytes.NewBuffer(goodJSON)) - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, good.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, good.StatusCode) defer good.Body.Close() badJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": []}`) bad, err := http.Post(ts.URL+"/dataset", "application/json", bytes.NewBuffer(badJSON)) - assert.NoError(t, err) - assert.Equal(t, http.StatusBadRequest, bad.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusBadRequest, bad.StatusCode) defer bad.Body.Close() + + fileID, err := Conf.API.DB.RegisterFile("/user/file-1.c4gh", "test.user@example.com") + assert.NoError(suite.T(), err, "failed to register file in database") + err = Conf.API.DB.SetAccessionID("5fe7b660-afea-4c3a-88a9-3daabf055ebb", fileID) + assert.NoError(suite.T(), err, "got (%v) when getting file archive information", err) + + fileID, err = Conf.API.DB.RegisterFile("/user/file-2.c4gh", "test.user@example.com") + assert.NoError(suite.T(), err, "failed to register file in database") + err = Conf.API.DB.SetAccessionID("ed6af454-d910-49e3-8cda-488a6f246e76", fileID) + assert.NoError(suite.T(), err, "got (%v) when getting file archive information", err) + + accessions := []string{"5fe7b660-afea-4c3a-88a9-3daabf055ebb", "ed6af454-d910-49e3-8cda-488a6f246e76"} + diSet := map[string][]string{ + "cd532362-e06e-4460-8490-b9ce64b8d9e6": accessions[0:1], + } + + for di, acs := range diSet { + err := Conf.API.DB.MapFilesToDataset(di, acs) + assert.NoError(suite.T(), err, "failed to map file to dataset") + } + + exists, err := http.Post(ts.URL+"/dataset", "application/json", bytes.NewBuffer(goodJSON)) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusAlreadyReported, exists.StatusCode) + defer good.Body.Close() } -func TestMetadataRoute(t *testing.T) { +func (suite *SyncAPITest) TestMetadataRoute() { Conf = &config.Config{} Conf.Broker.SchemasPath = "../../schemas" @@ -247,63 +360,60 @@ func TestMetadataRoute(t *testing.T) { goodJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "metadata": {"dummy":"data"}}`) good, err := http.Post(ts.URL+"/metadata", "application/json", bytes.NewBuffer(goodJSON)) - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, good.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, good.StatusCode) defer good.Body.Close() badJSON := []byte(`{"dataset_id": "phail", "metadata": {}}`) bad, err := http.Post(ts.URL+"/metadata", "application/json", bytes.NewBuffer(badJSON)) - assert.NoError(t, err) - assert.Equal(t, http.StatusBadRequest, bad.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusBadRequest, bad.StatusCode) defer bad.Body.Close() } -func TestBuildJSON(t *testing.T) { - Conf = &config.Config{} - Conf.Database = database.DBConf{ - Host: "localhost", - Port: 5432, - User: "postgres", - Password: "postgres", - Database: "lega", - SslMode: "disable", - } +func (suite *SyncAPITest) TestBuildJSON() { + suite.SetupTest() + Conf, err = config.NewConfig("sync-api") + assert.NoError(suite.T(), err) + + Conf.API.MQ, err = broker.NewMQ(Conf.Broker) + assert.NoError(suite.T(), err) + Conf.API.DB, err = database.NewSDAdb(Conf.Database) - if err != nil { - t.Skip("skip TestShutdown since broker not present") - } + assert.NoError(suite.T(), err) + + m := []byte(`{"type":"mapping", "dataset_id": "cd532362-e06e-4461-8490-b9ce64b8d9e7", "accession_ids": ["ed6af454-d910-49e3-8cda-488a6f246e67"]}`) + _, err := buildSyncDatasetJSON(m) + assert.EqualError(suite.T(), err, "sql: no rows in result set") + + fileID, err := Conf.API.DB.RegisterFile("dummy.user/test/file1.c4gh", "dummy.user") + assert.NoError(suite.T(), err, "failed to register file in database") + err = Conf.API.DB.SetAccessionID("ed6af454-d910-49e3-8cda-488a6f246e67", fileID) + assert.NoError(suite.T(), err) + + checksum := sha256.New() + fileInfo := database.FileInfo{Checksum: sha256.New(), Size: 1234, Path: "dummy.user/test/file1.c4gh", DecryptedChecksum: checksum, DecryptedSize: 999} + corrID := uuid.New().String() + + err = Conf.API.DB.SetArchived(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Archived") + err = Conf.API.DB.MarkCompleted(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Verified") + + accessions := []string{"ed6af454-d910-49e3-8cda-488a6f246e67"} + assert.NoError(suite.T(), Conf.API.DB.MapFilesToDataset("cd532362-e06e-4461-8490-b9ce64b8d9e7", accessions), "failed to map file to dataset") - db := Conf.API.DB.DB - - var fileID int64 - const insert = "INSERT INTO local_ega.main(submission_file_path, submission_user, decrypted_file_checksum, status, submission_file_extension) VALUES($1, $2, $3, 'READY', 'c4gh') RETURNING id;" - const accession = "UPDATE local_ega.files SET stable_id = $1 WHERE inbox_path = $2;" - const mapping = "INSERT INTO local_ega_ebi.filedataset(file_id, dataset_stable_id) VALUES ($1, 'cd532362-e06e-4460-8490-b9ce64b8d9e7');" - - err := db.QueryRow(insert, "dummy.user/test/file1.c4gh", "dummy.user", "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b").Scan(&fileID) - assert.NoError(t, err) - err = db.QueryRow(accession, "ed6af454-d910-49e3-8cda-488a6f246e76", "dummy.user/test/file1.c4gh").Err() - assert.NoError(t, err) - err = db.QueryRow(mapping, fileID).Err() - assert.NoError(t, err) - - err = db.QueryRow(insert, "dummy.user/test/file2.c4gh", "dummy.user", "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6").Scan(&fileID) - assert.NoError(t, err) - err = db.QueryRow(accession, "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "dummy.user/test/file2.c4gh").Err() - assert.NoError(t, err) - err = db.QueryRow(mapping, fileID).Err() - assert.NoError(t, err) - - m := []byte(`{"type":"mapping", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "accession_ids": ["5fe7b660-afea-4c3a-88a9-3daabf055ebb", "ed6af454-d910-49e3-8cda-488a6f246e76"]}`) - _, err = buildSyncDatasetJSON(m) - assert.NoError(t, err) + jsonData, err := buildSyncDatasetJSON(m) + assert.NoError(suite.T(), err) + dataset := []byte(`{"dataset_id":"cd532362-e06e-4461-8490-b9ce64b8d9e7","dataset_files":[{"filepath":"dummy.user/test/file1.c4gh","file_id":"ed6af454-d910-49e3-8cda-488a6f246e67","sha256":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}],"user":"dummy.user"}`) + assert.Equal(suite.T(), dataset, jsonData) } -func TestSendPOST(t *testing.T) { +func (suite *SyncAPITest) TestSendPOST() { r := http.NewServeMux() r.HandleFunc("/dataset", func(w http.ResponseWriter, r *http.Request) { _, err = w.Write([]byte(fmt.Sprint(http.StatusOK))) - assert.NoError(t, err) + assert.NoError(suite.T(), err) }) ts := httptest.NewServer(r) defer ts.Close() @@ -316,10 +426,10 @@ func TestSendPOST(t *testing.T) { } syncJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) err := sendPOST(syncJSON) - assert.NoError(t, err) + assert.NoError(suite.T(), err) } -func TestCreateHostURL(t *testing.T) { +func (suite *SyncAPITest) TestCreateHostURL() { Conf = &config.Config{} Conf.SyncAPI = config.SyncAPIConf{ RemoteHost: "http://localhost", @@ -327,11 +437,11 @@ func TestCreateHostURL(t *testing.T) { } s, err := createHostURL(Conf.SyncAPI.RemoteHost, Conf.SyncAPI.RemotePort) - assert.NoError(t, err) - assert.Equal(t, "http://localhost:443/dataset", s) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), "http://localhost:443/dataset", s) } -func TestBasicAuth(t *testing.T) { +func (suite *SyncAPITest) TestBasicAuth() { Conf = &config.Config{} Conf.Broker.SchemasPath = "../../schemas" Conf.SyncAPI = config.SyncAPIConf{ @@ -346,16 +456,16 @@ func TestBasicAuth(t *testing.T) { goodJSON := []byte(`{"dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "metadata": {"dummy":"data"}}`) req, err := http.NewRequest("POST", ts.URL+"/metadata", bytes.NewBuffer(goodJSON)) - assert.NoError(t, err) + assert.NoError(suite.T(), err) req.SetBasicAuth(Conf.SyncAPI.APIUser, Conf.SyncAPI.APIPassword) good, err := ts.Client().Do(req) - assert.NoError(t, err) - assert.Equal(t, http.StatusOK, good.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusOK, good.StatusCode) defer good.Body.Close() req.SetBasicAuth(Conf.SyncAPI.APIUser, "wrongpass") bad, err := ts.Client().Do(req) - assert.NoError(t, err) - assert.Equal(t, http.StatusUnauthorized, bad.StatusCode) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), http.StatusUnauthorized, bad.StatusCode) defer bad.Body.Close() } From a09a80a5c9f8cb34f3342759e3a83f5a612b1fd0 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 7 Nov 2023 09:37:38 +0100 Subject: [PATCH 16/34] [sync] switch to read messages from the `mapping_stream` --- .github/integration/sda-s3-integration.yml | 2 +- .../sda/{35_sync_test.sh => 45_sync_test.sh} | 0 sda/cmd/sync/sync.go | 166 ++++++++---------- sda/internal/database/db_functions.go | 29 +++ sda/internal/database/db_functions_test.go | 21 +++ 5 files changed, 126 insertions(+), 92 deletions(-) rename .github/integration/tests/sda/{35_sync_test.sh => 45_sync_test.sh} (100%) diff --git a/.github/integration/sda-s3-integration.yml b/.github/integration/sda-s3-integration.yml index 83c4494c9..e47f529c9 100644 --- a/.github/integration/sda-s3-integration.yml +++ b/.github/integration/sda-s3-integration.yml @@ -224,7 +224,7 @@ services: environment: - BROKER_PASSWORD=sync - BROKER_USER=sync - - BROKER_QUEUE=completed_stream + - BROKER_QUEUE=mapping_stream - DB_PASSWORD=sync - DB_USER=sync restart: always diff --git a/.github/integration/tests/sda/35_sync_test.sh b/.github/integration/tests/sda/45_sync_test.sh similarity index 100% rename from .github/integration/tests/sda/35_sync_test.sh rename to .github/integration/tests/sda/45_sync_test.sh diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index ae6b3973d..cf08b9ddc 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -17,6 +17,12 @@ import ( "golang.org/x/crypto/chacha20poly1305" ) +var ( + key, publicKey *[32]byte + db *database.SDAdb + archive, syncDestination storage.Backend +) + func main() { forever := make(chan bool) conf, err := config.NewConfig("sync") @@ -27,21 +33,19 @@ func main() { if err != nil { log.Fatal(err) } - db, err := database.NewSDAdb(conf.Database) + db, err = database.NewSDAdb(conf.Database) if err != nil { log.Fatal(err) } - syncDestination, err := storage.NewBackend(conf.Sync) + syncDestination, err = storage.NewBackend(conf.Sync) if err != nil { log.Fatal(err) } - archive, err := storage.NewBackend(conf.Archive) + archive, err = storage.NewBackend(conf.Archive) if err != nil { log.Fatal(err) } - var key *[32]byte - var publicKey *[32]byte key, err = config.GetC4GHKey() if err != nil { log.Fatal(err) @@ -69,7 +73,7 @@ func main() { }() log.Info("Starting sync service") - var message schema.IngestionCompletion + var message schema.DatasetMapping go func() { messages, err := mq.GetMessages(conf.Broker.Queue) @@ -81,14 +85,14 @@ func main() { delivered.CorrelationId, delivered.Body) - err := schema.ValidateJSON(fmt.Sprintf("%s/ingestion-completion.json", conf.Broker.SchemasPath), delivered.Body) + err := schema.ValidateJSON(fmt.Sprintf("%s/dataset-mapping.json", conf.Broker.SchemasPath), delivered.Body) if err != nil { - log.Errorf("validation of incoming message (ingestion-completion) failed, reason: (%s)", err.Error()) + log.Errorf("validation of incoming message (dataset-mapping) failed, reason: (%s)", err.Error()) // Send the message to an error queue so it can be analyzed. infoErrorMessage := broker.InfoError{ - Error: "Message validation failed", + Error: "Message validation failed in sync service", Reason: err.Error(), - OriginalMessage: message, + OriginalMessage: string(delivered.Body), } body, _ := json.Marshal(infoErrorMessage) @@ -104,103 +108,83 @@ func main() { // we unmarshal the message in the validation step so this is safe to do _ = json.Unmarshal(delivered.Body, &message) - filePath, fileSize, err := db.GetArchived(delivered.CorrelationId) - if err != nil { - log.Errorf("GetArchived failed, reason: %s", err.Error()) - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following GetArchived error message") - } - - continue - } - - diskFileSize, err := archive.GetFileSize(filePath) - if err != nil { - log.Errorf("failed to get size info for archived file %s, reason: (%s)", filePath, err.Error()) - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following GetFileSize error message") - } - continue - } + for _, aID := range message.AccessionIDs { + if err := syncFiles(aID); err != nil { + log.Errorf("failed to sync archived file %s, reason: (%s)", aID, err.Error()) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following GetFileSize error message") + } - if diskFileSize != int64(fileSize) { - log.Errorf("File size in archive does not match database for archive file %s - archive size is %d, database has %d ", - filePath, diskFileSize, fileSize, - ) - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following GetFileSize error message") + continue } - - continue } - file, err := archive.NewFileReader(filePath) - if err != nil { - log.Errorf("failed to open archived file %s, reason: (%s)", filePath, err.Error()) - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following open archived file error message") - } - - continue + if err := delivered.Ack(false); err != nil { + log.Errorf("failed to Ack message, reason: (%s)", err.Error()) } + } + }() - dest, err := syncDestination.NewFileWriter(message.FilePath) - if err != nil { - log.Errorf("failed to open destination file %s for writing, reason: (%s)", filePath, err.Error()) - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following open destination file error message") - } + <-forever +} - continue - } +func syncFiles(stableID string) error { + log.Debugf("syncing file %s", stableID) + inboxPath, err := db.GetInboxPath(stableID) + if err != nil { + return fmt.Errorf("failed to get inbox path for file with stable ID: %s", stableID) + } - header, err := db.GetHeaderForStableID(message.AccessionID) - if err != nil { - log.Errorf("GetHeaderForStableID %s failed, reason: (%s)", message.AccessionID, err.Error()) - } + archivePath, err := db.GetArchivePath(stableID) + if err != nil { + return fmt.Errorf("failed to get archive path for file with stable ID: %s", stableID) + } - log.Debug("Reencrypt header") - pubkeyList := [][chacha20poly1305.KeySize]byte{} - pubkeyList = append(pubkeyList, *publicKey) - newHeader, err := headers.ReEncryptHeader(header, *key, pubkeyList) - if err != nil { - log.Errorf("failed to reencrypt the header, reason(%s)", err.Error()) - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following reencrypt header error message") - } - } + fileSize, err := archive.GetFileSize(archivePath) + if err != nil { + return err + } - _, err = dest.Write(newHeader) - if err != nil { - log.Errorf("failed to write the header to destination %s, reason(%s)", message.FilePath, err.Error()) - } + file, err := archive.NewFileReader(archivePath) + if err != nil { + return err + } + defer file.Close() - // Copy the file and check is sizes match - copiedSize, err := io.Copy(dest, file) - if err != nil || copiedSize != int64(fileSize) { - switch { - case err != nil: - log.Errorf("failed to copy the file, reason (%s)", err.Error()) - case copiedSize != int64(fileSize): - log.Errorf("copied size does not match file size") - } + dest, err := syncDestination.NewFileWriter(inboxPath) + if err != nil { + return err + } + defer dest.Close() - if err := delivered.Nack(false, false); err != nil { - log.Errorf("failed to nack following reencrypt header error message") - } + header, err := db.GetHeaderForStableID(stableID) + if err != nil { + return err + } - continue - } + pubkeyList := [][chacha20poly1305.KeySize]byte{} + pubkeyList = append(pubkeyList, *publicKey) + newHeader, err := headers.ReEncryptHeader(header, *key, pubkeyList) + if err != nil { + return err + } - file.Close() - dest.Close() + _, err = dest.Write(newHeader) + if err != nil { + return err + } - if err := delivered.Ack(false); err != nil { - log.Errorf("failed to Ack message, reason: (%s)", err.Error()) - } + // Copy the file and check is sizes match + copiedSize, err := io.Copy(dest, file) + if err != nil || copiedSize != int64(fileSize) { + switch { + case copiedSize != int64(fileSize): + return fmt.Errorf("copied size does not match file size") + default: + return err } - }() + } - <-forever + return nil } diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index f74216809..540714267 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -593,3 +593,32 @@ func (dbs *SDAdb) checkIfDatasetExists(datasetID string) (bool, error) { return yesNo, nil } + +// GetInboxPath retrieves the submission_fie_path for a file with a given accessionID +func (dbs *SDAdb) GetArchivePath(stableID string) (string, error) { + var ( + err error + count int + archivePath string + ) + + for count == 0 || (err != nil && count < RetryTimes) { + archivePath, err = dbs.getArchivePath(stableID) + count++ + } + + return archivePath, err +} +func (dbs *SDAdb) getArchivePath(stableID string) (string, error) { + dbs.checkAndReconnectIfNeeded() + db := dbs.DB + const getFileID = "SELECT archive_file_path from sda.files WHERE stable_id = $1;" + + var archivePath string + err := db.QueryRow(getFileID, stableID).Scan(&archivePath) + if err != nil { + return "", err + } + + return archivePath, nil +} diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index 54656d28e..16dff65b9 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -445,3 +445,24 @@ func (suite *DatabaseTests) TestCheckIfDatasetExists() { assert.NoError(suite.T(), err, "check if dataset exists failed") assert.Equal(suite.T(), ok, false) } + +func (suite *DatabaseTests) TestGetArchivePath() { + db, err := NewSDAdb(suite.dbConf) + assert.NoError(suite.T(), err, "got (%v) when creating new connection", err) + + fileID, err := db.RegisterFile("/testuser/TestGetArchivePath-001.c4gh", "testuser") + assert.NoError(suite.T(), err, "failed to register file in database") + + checksum := sha256.New() + corrID := uuid.New().String() + fileInfo := FileInfo{sha256.New(), 1234, corrID, checksum, 999} + err = db.SetArchived(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Archived") + + err = db.SetAccessionID("acession-0001", fileID) + assert.NoError(suite.T(), err, "got (%v) when getting file archive information", err) + + path, err := db.getArchivePath("acession-0001") + assert.NoError(suite.T(), err, "getArchivePath failed") + assert.Equal(suite.T(), path, corrID) +} From 56129a70f5bb36b85240e325e0f7f83e498fe875 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 7 Nov 2023 12:05:40 +0100 Subject: [PATCH 17/34] [sync] send POST after files have been synced --- sda/cmd/sync/sync.go | 83 +++++++++++- sda/cmd/sync/sync_test.go | 204 +++++++++++++++++++++++++++-- sda/internal/config/config.go | 35 +++-- sda/internal/config/config_test.go | 4 +- 4 files changed, 299 insertions(+), 27 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index cf08b9ddc..48d2b9ee8 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -3,9 +3,12 @@ package main import ( + "bytes" "encoding/json" "fmt" "io" + "net/http" + "net/url" "github.com/neicnordic/crypt4gh/model/headers" "github.com/neicnordic/sensitive-data-archive/internal/broker" @@ -18,14 +21,16 @@ import ( ) var ( + err error key, publicKey *[32]byte db *database.SDAdb + conf *config.Config archive, syncDestination storage.Backend ) func main() { forever := make(chan bool) - conf, err := config.NewConfig("sync") + conf, err = config.NewConfig("sync") if err != nil { log.Fatal(err) } @@ -37,7 +42,8 @@ func main() { if err != nil { log.Fatal(err) } - syncDestination, err = storage.NewBackend(conf.Sync) + + syncDestination, err = storage.NewBackend(conf.Sync.Destination) if err != nil { log.Fatal(err) } @@ -120,6 +126,15 @@ func main() { } } + log.Infoln("buildSyncDatasetJSON") + blob, err := buildSyncDatasetJSON(delivered.Body) + if err != nil { + log.Errorf("failed to build SyncDatasetJSON, Reason: %v", err) + } + if err := sendPOST(blob); err != nil { + log.Errorf("failed to send POST, Reason: %v", err) + } + if err := delivered.Ack(false); err != nil { log.Errorf("failed to Ack message, reason: (%s)", err.Error()) } @@ -188,3 +203,67 @@ func syncFiles(stableID string) error { return nil } + +func buildSyncDatasetJSON(b []byte) ([]byte, error) { + var msg schema.DatasetMapping + _ = json.Unmarshal(b, &msg) + + var dataset = schema.SyncDataset{ + DatasetID: msg.DatasetID, + } + + for _, ID := range msg.AccessionIDs { + data, err := db.GetSyncData(ID) + if err != nil { + return nil, err + } + datasetFile := schema.DatasetFiles{ + FilePath: data.FilePath, + FileID: ID, + ShaSum: data.Checksum, + } + dataset.DatasetFiles = append(dataset.DatasetFiles, datasetFile) + dataset.User = data.User + } + + json, err := json.Marshal(dataset) + if err != nil { + return nil, err + } + + return json, nil +} + +func sendPOST(payload []byte) error { + client := &http.Client{} + URL, err := createHostURL(conf.Sync.RemoteHost, conf.Sync.RemotePort) + if err != nil { + return err + } + + req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload)) + if err != nil { + return err + } + req.SetBasicAuth(conf.Sync.RemoteUser, conf.Sync.RemotePassword) + resp, err := client.Do(req) + if err != nil || resp.StatusCode != http.StatusOK { + return err + } + defer resp.Body.Close() + + return nil +} + +func createHostURL(host string, port int) (string, error) { + url, err := url.ParseRequestURI(host) + if err != nil { + return "", err + } + if url.Port() == "" && port != 0 { + url.Host += fmt.Sprintf(":%d", port) + } + url.Path = "/dataset" + + return url.String(), nil +} diff --git a/sda/cmd/sync/sync_test.go b/sda/cmd/sync/sync_test.go index 2fa0bf9ee..59dd8b1d4 100644 --- a/sda/cmd/sync/sync_test.go +++ b/sda/cmd/sync/sync_test.go @@ -1,34 +1,210 @@ package main import ( + "context" + "crypto/sha256" + "database/sql" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path" + "runtime" + "strconv" "testing" + "time" + "github.com/google/uuid" + "github.com/neicnordic/sensitive-data-archive/internal/config" + "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/ory/dockertest/v3" + "github.com/ory/dockertest/v3/docker" + log "github.com/sirupsen/logrus" "github.com/spf13/viper" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) -type TestSuite struct { +var dbPort int + +type SyncTest struct { suite.Suite } -func TestBackupTestSuite(t *testing.T) { - suite.Run(t, new(TestSuite)) +func TestSyncTestSuite(t *testing.T) { + suite.Run(t, new(SyncTest)) +} + +func TestMain(m *testing.M) { + if _, err := os.Stat("/.dockerenv"); err == nil { + m.Run() + } + _, b, _, _ := runtime.Caller(0) + rootDir := path.Join(path.Dir(b), "../../../") + + // uses a sensible default on windows (tcp/http) and linux/osx (socket) + pool, err := dockertest.NewPool("") + if err != nil { + log.Fatalf("Could not construct pool: %s", err) + } + + // uses pool to try to connect to Docker + err = pool.Client.Ping() + if err != nil { + log.Fatalf("Could not connect to Docker: %s", err) + } + + // pulls an image, creates a container based on it and runs it + postgres, err := pool.RunWithOptions(&dockertest.RunOptions{ + Repository: "postgres", + Tag: "15.2-alpine3.17", + Env: []string{ + "POSTGRES_PASSWORD=rootpasswd", + "POSTGRES_DB=sda", + }, + Mounts: []string{ + fmt.Sprintf("%s/postgresql/initdb.d:/docker-entrypoint-initdb.d", rootDir), + }, + }, func(config *docker.HostConfig) { + // set AutoRemove to true so that stopped container goes away by itself + config.AutoRemove = true + config.RestartPolicy = docker.RestartPolicy{ + Name: "no", + } + }) + if err != nil { + log.Fatalf("Could not start resource: %s", err) + } + + dbHostAndPort := postgres.GetHostPort("5432/tcp") + dbPort, _ = strconv.Atoi(postgres.GetPort("5432/tcp")) + databaseURL := fmt.Sprintf("postgres://postgres:rootpasswd@%s/sda?sslmode=disable", dbHostAndPort) + + pool.MaxWait = 120 * time.Second + if err = pool.Retry(func() error { + db, err := sql.Open("postgres", databaseURL) + if err != nil { + log.Println(err) + + return err + } + + query := "SELECT MAX(version) FROM sda.dbschema_version" + var dbVersion int + + return db.QueryRow(query).Scan(&dbVersion) + }); err != nil { + log.Fatalf("Could not connect to postgres: %s", err) + } + + log.Println("starting tests") + _ = m.Run() + + log.Println("tests completed") + if err := pool.Purge(postgres); err != nil { + log.Fatalf("Could not purge resource: %s", err) + } + pvo := docker.PruneVolumesOptions{Filters: make(map[string][]string), Context: context.Background()} + if _, err := pool.Client.PruneVolumes(pvo); err != nil { + log.Fatalf("could not prune docker volumes: %s", err.Error()) + } } -func (suite *TestSuite) SetupTest() { +func (suite *SyncTest) SetupTest() { viper.Set("log.level", "debug") + viper.Set("archive.type", "posix") viper.Set("archive.location", "../../dev_utils") - viper.Set("backup.location", "../../dev_utils") + viper.Set("sync.destination.type", "posix") + viper.Set("sync.destination.location", "../../dev_utils") - viper.Set("broker.host", "test") + viper.Set("broker.host", "localhost") viper.Set("broker.port", 123) - viper.Set("broker.user", "test") - viper.Set("broker.password", "test") + viper.Set("broker.user", "guest") + viper.Set("broker.password", "guest") viper.Set("broker.queue", "test") - viper.Set("broker.routingkey", "test") - viper.Set("db.host", "test") - viper.Set("db.port", 123) - viper.Set("db.user", "test") - viper.Set("db.password", "test") - viper.Set("db.database", "test") + viper.Set("db.host", "localhost") + viper.Set("db.port", dbPort) + viper.Set("db.user", "postgres") + viper.Set("db.password", "rootpasswd") + viper.Set("db.database", "sda") + viper.Set("db.sslmode", "disable") + + key := "-----BEGIN CRYPT4GH ENCRYPTED PRIVATE KEY-----\nYzRnaC12MQAGc2NyeXB0ABQAAAAAEna8op+BzhTVrqtO5Rx7OgARY2hhY2hhMjBfcG9seTEzMDUAPMx2Gbtxdva0M2B0tb205DJT9RzZmvy/9ZQGDx9zjlObj11JCqg57z60F0KhJW+j/fzWL57leTEcIffRTA==\n-----END CRYPT4GH ENCRYPTED PRIVATE KEY-----" + keyPath, _ := os.MkdirTemp("", "key") + err := os.WriteFile(keyPath+"/c4gh.key", []byte(key), 0600) + assert.NoError(suite.T(), err) + + viper.Set("c4gh.filepath", keyPath+"/c4gh.key") + viper.Set("c4gh.passphrase", "test") + + pubKey := "-----BEGIN CRYPT4GH PUBLIC KEY-----\nuQO46R56f/Jx0YJjBAkZa2J6n72r6HW/JPMS4tfepBs=\n-----END CRYPT4GH PUBLIC KEY-----" + err = os.WriteFile(keyPath+"/c4gh.pub", []byte(pubKey), 0600) + assert.NoError(suite.T(), err) + viper.Set("c4gh.syncPubKeyPath", keyPath+"/c4gh.pub") + + defer os.RemoveAll(keyPath) +} + +func (suite *SyncTest) TestBuildSyncDatasetJSON() { + suite.SetupTest() + Conf, err := config.NewConfig("sync") + assert.NoError(suite.T(), err) + + db, err = database.NewSDAdb(Conf.Database) + assert.NoError(suite.T(), err) + + fileID, err := db.RegisterFile("dummy.user/test/file1.c4gh", "dummy.user") + assert.NoError(suite.T(), err, "failed to register file in database") + err = db.SetAccessionID("ed6af454-d910-49e3-8cda-488a6f246e67", fileID) + assert.NoError(suite.T(), err) + + checksum := sha256.New() + fileInfo := database.FileInfo{Checksum: sha256.New(), Size: 1234, Path: "dummy.user/test/file1.c4gh", DecryptedChecksum: checksum, DecryptedSize: 999} + corrID := uuid.New().String() + + err = db.SetArchived(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Archived") + err = db.MarkCompleted(fileInfo, fileID, corrID) + assert.NoError(suite.T(), err, "failed to mark file as Verified") + + accessions := []string{"ed6af454-d910-49e3-8cda-488a6f246e67"} + assert.NoError(suite.T(), db.MapFilesToDataset("cd532362-e06e-4461-8490-b9ce64b8d9e7", accessions), "failed to map file to dataset") + + m := []byte(`{"type":"mapping", "dataset_id": "cd532362-e06e-4461-8490-b9ce64b8d9e7", "accession_ids": ["ed6af454-d910-49e3-8cda-488a6f246e67"]}`) + jsonData, err := buildSyncDatasetJSON(m) + assert.NoError(suite.T(), err) + dataset := []byte(`{"dataset_id":"cd532362-e06e-4461-8490-b9ce64b8d9e7","dataset_files":[{"filepath":"dummy.user/test/file1.c4gh","file_id":"ed6af454-d910-49e3-8cda-488a6f246e67","sha256":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}],"user":"dummy.user"}`) + assert.Equal(suite.T(), dataset, jsonData) +} + +func (suite *SyncTest) TestCreateHostURL() { + conf = &config.Config{} + conf.Sync = config.Sync{ + RemoteHost: "http://localhost", + RemotePort: 443, + } + + s, err := createHostURL(conf.Sync.RemoteHost, conf.Sync.RemotePort) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), "http://localhost:443/dataset", s) +} + +func (suite *SyncTest) TestSendPOST() { + r := http.NewServeMux() + r.HandleFunc("/dataset", func(w http.ResponseWriter, r *http.Request) { + _, err = w.Write([]byte(fmt.Sprint(http.StatusOK))) + assert.NoError(suite.T(), err) + }) + ts := httptest.NewServer(r) + defer ts.Close() + + conf = &config.Config{} + conf.Sync = config.Sync{ + RemoteHost: ts.URL, + RemoteUser: "test", + RemotePassword: "test", + } + syncJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) + err := sendPOST(syncJSON) + assert.NoError(suite.T(), err) } diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 71e528971..e539ee727 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -46,10 +46,18 @@ type Config struct { API APIConf Notify SMTPConf Orchestrator OrchestratorConf - Sync storage.Conf + Sync Sync SyncAPI SyncAPIConf } +type Sync struct { + Destination storage.Conf + RemoteHost string + RemotePassword string + RemotePort int + RemoteUser string +} + type SyncAPIConf struct { APIPassword string APIUser string @@ -270,6 +278,7 @@ func NewConfig(app string) (*Config, error) { viper.Set("inbox.type", S3) case "sync": requiredConfVars = []string{ + "archive.type", "broker.host", "broker.port", "broker.user", @@ -283,6 +292,7 @@ func NewConfig(app string) (*Config, error) { "db.user", "db.password", "db.database", + "sync.destination.type", } switch viper.GetString("archive.type") { @@ -478,7 +488,7 @@ func NewConfig(app string) (*Config, error) { } c.configArchive() - c.configSyncDestination() + c.configSync() c.configSchemas() case "sync-api": @@ -816,18 +826,25 @@ func (c *Config) configSMTP() { } // configSync provides configuration for the sync destination storage -func (c *Config) configSyncDestination() { +func (c *Config) configSync() { switch viper.GetString("sync.destination.type") { case S3: - c.Sync.Type = S3 - c.Sync.S3 = configS3Storage("sync.destination") + c.Sync.Destination.Type = S3 + c.Sync.Destination.S3 = configS3Storage("sync.destination") case SFTP: - c.Sync.Type = SFTP - c.Sync.SFTP = configSFTP("sync.destination") + c.Sync.Destination.Type = SFTP + c.Sync.Destination.SFTP = configSFTP("sync.destination") case POSIX: - c.Sync.Type = POSIX - c.Sync.Posix.Location = viper.GetString("sync.destination.location") + c.Sync.Destination.Type = POSIX + c.Sync.Destination.Posix.Location = viper.GetString("sync.destination.location") + } + + c.Sync.RemoteHost = viper.GetString("sync.remote.host") + if viper.IsSet("sync.remote.port") { + c.Sync.RemotePort = viper.GetInt("sync.remote.port") } + c.Sync.RemotePassword = viper.GetString("sync.remote.pass") + c.Sync.RemoteUser = viper.GetString("sync.remote.user") } // configSync provides configuration for the outgoing sync settings diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index fd8bb5fed..bd5cb1a2a 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -301,8 +301,8 @@ func (suite *ConfigTestSuite) TestSyncConfig() { assert.NotNil(suite.T(), config.Archive.Posix) assert.Equal(suite.T(), "test", config.Archive.Posix.Location) assert.NotNil(suite.T(), config.Sync) - assert.NotNil(suite.T(), config.Sync.Posix) - assert.Equal(suite.T(), "test", config.Sync.Posix.Location) + assert.NotNil(suite.T(), config.Sync.Destination.Posix) + assert.Equal(suite.T(), "test", config.Sync.Destination.Posix.Location) } func (suite *ConfigTestSuite) TestGetC4GHPublicKey() { pubKey := "-----BEGIN CRYPT4GH PUBLIC KEY-----\nuQO46R56f/Jx0YJjBAkZa2J6n72r6HW/JPMS4tfepBs=\n-----END CRYPT4GH PUBLIC KEY-----" From 3d5c27f233cbd3641badc887ba70e5a8aef27d7f Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 7 Nov 2023 15:12:55 +0100 Subject: [PATCH 18/34] [sync] Add dataset ID validation By checking the prefix of the dataset ID in the message we can skip processing datasets that originate from another center. --- sda/cmd/sync/sync.go | 29 ++++++++++++++++++++++++++--- sda/cmd/sync/sync_test.go | 16 ++++++++++++++-- sda/internal/config/config.go | 6 ++++-- sda/internal/config/config_test.go | 1 + 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/sda/cmd/sync/sync.go b/sda/cmd/sync/sync.go index 48d2b9ee8..4f120a018 100644 --- a/sda/cmd/sync/sync.go +++ b/sda/cmd/sync/sync.go @@ -9,6 +9,8 @@ import ( "io" "net/http" "net/url" + "strings" + "time" "github.com/neicnordic/crypt4gh/model/headers" "github.com/neicnordic/sensitive-data-archive/internal/broker" @@ -115,6 +117,15 @@ func main() { // we unmarshal the message in the validation step so this is safe to do _ = json.Unmarshal(delivered.Body, &message) + if !strings.HasPrefix(message.DatasetID, conf.Sync.CenterPrefix) { + log.Infoln("external dataset") + if err := delivered.Ack(false); err != nil { + log.Errorf("failed to Ack message, reason: (%s)", err.Error()) + } + + continue + } + for _, aID := range message.AccessionIDs { if err := syncFiles(aID); err != nil { log.Errorf("failed to sync archived file %s, reason: (%s)", aID, err.Error()) @@ -133,6 +144,11 @@ func main() { } if err := sendPOST(blob); err != nil { log.Errorf("failed to send POST, Reason: %v", err) + if err := delivered.Nack(false, false); err != nil { + log.Errorf("failed to nack following sendPOST error message") + } + + continue } if err := delivered.Ack(false); err != nil { @@ -235,21 +251,28 @@ func buildSyncDatasetJSON(b []byte) ([]byte, error) { } func sendPOST(payload []byte) error { - client := &http.Client{} + client := &http.Client{ + Timeout: 30 * time.Second, + } + URL, err := createHostURL(conf.Sync.RemoteHost, conf.Sync.RemotePort) if err != nil { return err } - req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload)) + req, err := http.NewRequest(http.MethodPost, URL, bytes.NewBuffer(payload)) if err != nil { return err } + req.Header.Set("Content-Type", "application/json") req.SetBasicAuth(conf.Sync.RemoteUser, conf.Sync.RemotePassword) resp, err := client.Do(req) - if err != nil || resp.StatusCode != http.StatusOK { + if err != nil { return err } + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s", resp.Status) + } defer resp.Body.Close() return nil diff --git a/sda/cmd/sync/sync_test.go b/sda/cmd/sync/sync_test.go index 59dd8b1d4..f53208e92 100644 --- a/sda/cmd/sync/sync_test.go +++ b/sda/cmd/sync/sync_test.go @@ -128,6 +128,7 @@ func (suite *SyncTest) SetupTest() { viper.Set("db.password", "rootpasswd") viper.Set("db.database", "sda") viper.Set("db.sslmode", "disable") + viper.Set("centerPrefix", "prefix") key := "-----BEGIN CRYPT4GH ENCRYPTED PRIVATE KEY-----\nYzRnaC12MQAGc2NyeXB0ABQAAAAAEna8op+BzhTVrqtO5Rx7OgARY2hhY2hhMjBfcG9seTEzMDUAPMx2Gbtxdva0M2B0tb205DJT9RzZmvy/9ZQGDx9zjlObj11JCqg57z60F0KhJW+j/fzWL57leTEcIffRTA==\n-----END CRYPT4GH ENCRYPTED PRIVATE KEY-----" keyPath, _ := os.MkdirTemp("", "key") @@ -192,8 +193,12 @@ func (suite *SyncTest) TestCreateHostURL() { func (suite *SyncTest) TestSendPOST() { r := http.NewServeMux() r.HandleFunc("/dataset", func(w http.ResponseWriter, r *http.Request) { - _, err = w.Write([]byte(fmt.Sprint(http.StatusOK))) - assert.NoError(suite.T(), err) + username, _, ok := r.BasicAuth() + if ok && username == "foo" { + w.WriteHeader(http.StatusUnauthorized) + } + + w.WriteHeader(http.StatusOK) }) ts := httptest.NewServer(r) defer ts.Close() @@ -207,4 +212,11 @@ func (suite *SyncTest) TestSendPOST() { syncJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) err := sendPOST(syncJSON) assert.NoError(suite.T(), err) + + conf.Sync = config.Sync{ + RemoteHost: ts.URL, + RemoteUser: "foo", + RemotePassword: "bar", + } + assert.EqualError(suite.T(), sendPOST(syncJSON), "401 Unauthorized") } diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index e539ee727..61ce5ade5 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -51,6 +51,7 @@ type Config struct { } type Sync struct { + CenterPrefix string Destination storage.Conf RemoteHost string RemotePassword string @@ -278,12 +279,12 @@ func NewConfig(app string) (*Config, error) { viper.Set("inbox.type", S3) case "sync": requiredConfVars = []string{ - "archive.type", "broker.host", "broker.port", "broker.user", "broker.password", "broker.queue", + "centerPrefix", "c4gh.filepath", "c4gh.passphrase", "c4gh.syncPubKeyPath", @@ -292,7 +293,6 @@ func NewConfig(app string) (*Config, error) { "db.user", "db.password", "db.database", - "sync.destination.type", } switch viper.GetString("archive.type") { @@ -845,6 +845,8 @@ func (c *Config) configSync() { } c.Sync.RemotePassword = viper.GetString("sync.remote.pass") c.Sync.RemoteUser = viper.GetString("sync.remote.user") + + c.Sync.CenterPrefix = viper.GetString("sync.centerPrefix") } // configSync provides configuration for the outgoing sync settings diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index bd5cb1a2a..bb82b322e 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -275,6 +275,7 @@ func (suite *ConfigTestSuite) TestSyncConfig() { assert.Error(suite.T(), err) assert.Nil(suite.T(), config) + viper.Set("centerPrefix", "prefix") viper.Set("archive.type", "posix") viper.Set("archive.location", "test") viper.Set("sync.destination.type", "posix") From 4356d4910b4745909abb5b4ec033620921f6b174 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 7 Nov 2023 15:21:40 +0100 Subject: [PATCH 19/34] [sync-api] remove everything database related --- sda/cmd/syncapi/syncapi.go | 144 +---------------------- sda/cmd/syncapi/syncapi_test.go | 199 +------------------------------- sda/internal/config/config.go | 11 +- 3 files changed, 5 insertions(+), 349 deletions(-) diff --git a/sda/cmd/syncapi/syncapi.go b/sda/cmd/syncapi/syncapi.go index 2dd6f7bf5..97d516427 100644 --- a/sda/cmd/syncapi/syncapi.go +++ b/sda/cmd/syncapi/syncapi.go @@ -1,8 +1,6 @@ package main import ( - "bytes" - "context" "crypto/sha256" "crypto/subtle" "crypto/tls" @@ -10,7 +8,6 @@ import ( "fmt" "io" "net/http" - "net/url" "os" "os/signal" "syscall" @@ -19,7 +16,6 @@ import ( "github.com/gorilla/mux" "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" - "github.com/neicnordic/sensitive-data-archive/internal/database" "github.com/neicnordic/sensitive-data-archive/internal/schema" log "github.com/sirupsen/logrus" @@ -49,10 +45,6 @@ func main() { if err != nil { log.Fatal(err) } - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - if err != nil { - log.Fatal(err) - } sigc := make(chan os.Signal, 5) signal.Notify(sigc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) @@ -62,51 +54,6 @@ func main() { os.Exit(0) }() - go func() { - forever := make(chan bool) - messages, err := Conf.API.MQ.GetMessages(Conf.Broker.Queue) - if err != nil { - log.Fatal(err) - } - for m := range messages { - log.Debugf("Received a message (corr-id: %s, message: %s)", m.CorrelationId, m.Body) - err := schema.ValidateJSON(fmt.Sprintf("%s/dataset-mapping.json", Conf.Broker.SchemasPath), m.Body) - if err != nil { - log.Errorf("validation of incoming message (dataset-mapping) failed, reason: (%s)", err.Error()) - // Send the message to an error queue so it can be analyzed. - infoErrorMessage := broker.InfoError{ - Error: "Message validation failed", - Reason: err.Error(), - OriginalMessage: m, - } - - body, _ := json.Marshal(infoErrorMessage) - if err := Conf.API.MQ.SendMessage(m.CorrelationId, Conf.Broker.Exchange, "error", body); err != nil { - log.Errorf("failed to publish message, reason: (%s)", err.Error()) - } - if err := m.Ack(false); err != nil { - log.Errorf("failed to Ack message, reason: (%s)", err.Error()) - } - - continue - } - - log.Infoln("buildSyncDatasetJSON") - blob, err := buildSyncDatasetJSON(m.Body) - if err != nil { - log.Errorf("failed to build SyncDatasetJSON, Reason: %v", err) - } - if err := sendPOST(blob); err != nil { - log.Errorf("failed to send POST, Reason: %v", err) - } - if err := m.Ack(false); err != nil { - log.Errorf("Failed to ack message: reason %v", err) - } - - } - <-forever - }() - srv := setup(Conf) if Conf.API.ServerCert != "" && Conf.API.ServerKey != "" { @@ -157,10 +104,9 @@ func setup(config *config.Config) *http.Server { func shutdown() { defer Conf.API.MQ.Channel.Close() defer Conf.API.MQ.Connection.Close() - defer Conf.API.DB.Close() } -func readinessResponse(w http.ResponseWriter, r *http.Request) { +func readinessResponse(w http.ResponseWriter, _ *http.Request) { statusCocde := http.StatusOK if Conf.API.MQ.Connection.IsClosed() { @@ -184,25 +130,9 @@ func readinessResponse(w http.ResponseWriter, r *http.Request) { } } - if DBRes := checkDB(Conf.API.DB, 5*time.Millisecond); DBRes != nil { - log.Debugf("DB connection error :%v", DBRes) - Conf.API.DB.Connect() - statusCocde = http.StatusServiceUnavailable - } - w.WriteHeader(statusCocde) } -func checkDB(database *database.SDAdb, timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - if database.DB == nil { - return fmt.Errorf("database is nil") - } - - return database.DB.PingContext(ctx) -} - func dataset(w http.ResponseWriter, r *http.Request) { b, err := io.ReadAll(r.Body) if err != nil { @@ -234,14 +164,6 @@ func parseDatasetMessage(msg []byte) error { blob := syncDataset{} _ = json.Unmarshal(msg, &blob) - ds, err := Conf.API.DB.CheckIfDatasetExists(blob.DatasetID) - if err != nil { - return fmt.Errorf("Failed to check dataset existance: Reason %v", err) - } - if ds { - return fmt.Errorf("Dataset exists") - } - var accessionIDs []string for _, files := range blob.DatasetFiles { ingest := schema.IngestionTrigger{ @@ -327,70 +249,6 @@ func metadata(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) } -func buildSyncDatasetJSON(b []byte) ([]byte, error) { - var msg schema.DatasetMapping - _ = json.Unmarshal(b, &msg) - - var dataset = syncDataset{ - DatasetID: msg.DatasetID, - } - - for _, ID := range msg.AccessionIDs { - data, err := Conf.API.DB.GetSyncData(ID) - if err != nil { - return nil, err - } - datasetFile := datasetFiles{ - FilePath: data.FilePath, - FileID: ID, - ShaSum: data.Checksum, - } - dataset.DatasetFiles = append(dataset.DatasetFiles, datasetFile) - dataset.User = data.User - } - - json, err := json.Marshal(dataset) - if err != nil { - return nil, err - } - - return json, nil -} - -func sendPOST(payload []byte) error { - client := &http.Client{} - URL, err := createHostURL(Conf.SyncAPI.RemoteHost, Conf.SyncAPI.RemotePort) - if err != nil { - return err - } - - req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload)) - if err != nil { - return err - } - req.SetBasicAuth(Conf.SyncAPI.RemoteUser, Conf.SyncAPI.RemotePassword) - resp, err := client.Do(req) - if err != nil || resp.StatusCode != http.StatusOK { - return err - } - defer resp.Body.Close() - - return nil -} - -func createHostURL(host string, port int) (string, error) { - url, err := url.ParseRequestURI(host) - if err != nil { - return "", err - } - if url.Port() == "" && port != 0 { - url.Host += fmt.Sprintf(":%d", port) - } - url.Path = "/dataset" - - return url.String(), nil -} - func basicAuth(auth http.HandlerFunc) http.HandlerFunc { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { username, password, ok := r.BasicAuth() diff --git a/sda/cmd/syncapi/syncapi_test.go b/sda/cmd/syncapi/syncapi_test.go index bb73d6295..c80134f92 100644 --- a/sda/cmd/syncapi/syncapi_test.go +++ b/sda/cmd/syncapi/syncapi_test.go @@ -3,23 +3,16 @@ package main import ( "bytes" "context" - "crypto/sha256" - "database/sql" - "fmt" "net/http" "net/http/httptest" "os" - "path" - "runtime" "strconv" "testing" "time" - "github.com/google/uuid" "github.com/gorilla/mux" "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" - "github.com/neicnordic/sensitive-data-archive/internal/database" "github.com/ory/dockertest/v3" "github.com/ory/dockertest/v3/docker" "github.com/spf13/viper" @@ -29,7 +22,7 @@ import ( log "github.com/sirupsen/logrus" ) -var dbPort, mqPort int +var mqPort int type SyncAPITest struct { suite.Suite @@ -43,8 +36,6 @@ func TestMain(m *testing.M) { if _, err := os.Stat("/.dockerenv"); err == nil { m.Run() } - _, b, _, _ := runtime.Caller(0) - rootDir := path.Join(path.Dir(b), "../../../") // uses a sensible default on windows (tcp/http) and linux/osx (socket) pool, err := dockertest.NewPool("") @@ -58,49 +49,6 @@ func TestMain(m *testing.M) { log.Fatalf("Could not connect to Docker: %s", err) } - // pulls an image, creates a container based on it and runs it - postgres, err := pool.RunWithOptions(&dockertest.RunOptions{ - Repository: "postgres", - Tag: "15.2-alpine3.17", - Env: []string{ - "POSTGRES_PASSWORD=rootpasswd", - "POSTGRES_DB=sda", - }, - Mounts: []string{ - fmt.Sprintf("%s/postgresql/initdb.d:/docker-entrypoint-initdb.d", rootDir), - }, - }, func(config *docker.HostConfig) { - // set AutoRemove to true so that stopped container goes away by itself - config.AutoRemove = true - config.RestartPolicy = docker.RestartPolicy{ - Name: "no", - } - }) - if err != nil { - log.Fatalf("Could not start resource: %s", err) - } - - dbHostAndPort := postgres.GetHostPort("5432/tcp") - dbPort, _ = strconv.Atoi(postgres.GetPort("5432/tcp")) - databaseURL := fmt.Sprintf("postgres://postgres:rootpasswd@%s/sda?sslmode=disable", dbHostAndPort) - - pool.MaxWait = 120 * time.Second - if err = pool.Retry(func() error { - db, err := sql.Open("postgres", databaseURL) - if err != nil { - log.Println(err) - - return err - } - - query := "SELECT MAX(version) FROM sda.dbschema_version" - var dbVersion int - - return db.QueryRow(query).Scan(&dbVersion) - }); err != nil { - log.Fatalf("Could not connect to postgres: %s", err) - } - // pulls an image, creates a container based on it and runs it rabbitmq, err := pool.RunWithOptions(&dockertest.RunOptions{ Repository: "rabbitmq", @@ -146,9 +94,6 @@ func TestMain(m *testing.M) { _ = m.Run() log.Println("tests completed") - if err := pool.Purge(postgres); err != nil { - log.Fatalf("Could not purge resource: %s", err) - } if err := pool.Purge(rabbitmq); err != nil { log.Fatalf("Could not purge resource: %s", err) } @@ -162,6 +107,8 @@ func (suite *SyncAPITest) SetupTest() { viper.Set("log.level", "debug") viper.Set("log.format", "json") + viper.Set("bpPrefix", "PFX") + viper.Set("broker.host", "127.0.0.1") viper.Set("broker.port", mqPort) viper.Set("broker.user", "guest") @@ -170,13 +117,6 @@ func (suite *SyncAPITest) SetupTest() { viper.Set("broker.exchange", "amq.direct") viper.Set("broker.vhost", "/") - viper.Set("db.host", "127.0.0.1") - viper.Set("db.port", dbPort) - viper.Set("db.user", "postgres") - viper.Set("db.password", "rootpasswd") - viper.Set("db.database", "sda") - viper.Set("db.sslmode", "disable") - viper.Set("schema.type", "isolated") viper.Set("sync.api.user", "dummy") @@ -204,19 +144,13 @@ func (suite *SyncAPITest) TestShutdown() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "127.0.0.1", Conf.API.MQ.Conf.Host) - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - assert.NoError(suite.T(), err) - assert.Equal(suite.T(), "127.0.0.1", Conf.API.DB.Config.Host) - // make sure all conections are alive assert.Equal(suite.T(), false, Conf.API.MQ.Channel.IsClosed()) assert.Equal(suite.T(), false, Conf.API.MQ.Connection.IsClosed()) - assert.Equal(suite.T(), nil, Conf.API.DB.DB.Ping()) shutdown() assert.Equal(suite.T(), true, Conf.API.MQ.Channel.IsClosed()) assert.Equal(suite.T(), true, Conf.API.MQ.Connection.IsClosed()) - assert.Equal(suite.T(), "sql: database is closed", Conf.API.DB.DB.Ping().Error()) } func (suite *SyncAPITest) TestReadinessResponse() { @@ -227,9 +161,6 @@ func (suite *SyncAPITest) TestReadinessResponse() { Conf.API.MQ, err = broker.NewMQ(Conf.Broker) assert.NoError(suite.T(), err) - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - assert.NoError(suite.T(), err) - r := mux.NewRouter() r.HandleFunc("/ready", readinessResponse) ts := httptest.NewServer(r) @@ -265,32 +196,6 @@ func (suite *SyncAPITest) TestReadinessResponse() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), http.StatusOK, res.StatusCode) defer res.Body.Close() - - // close DB connection to force a reconnection - Conf.API.DB.Close() - res, err = http.Get(ts.URL + "/ready") - assert.NoError(suite.T(), err) - assert.Equal(suite.T(), http.StatusServiceUnavailable, res.StatusCode) - defer res.Body.Close() - - // reconnect should be fast so now this should pass - res, err = http.Get(ts.URL + "/ready") - assert.NoError(suite.T(), err) - assert.Equal(suite.T(), http.StatusOK, res.StatusCode) - defer res.Body.Close() -} - -func (suite *SyncAPITest) TestDatabasePingCheck() { - suite.SetupTest() - Conf, err = config.NewConfig("sync-api") - assert.NoError(suite.T(), err) - - noDB := database.SDAdb{} - assert.Error(suite.T(), checkDB(&noDB, 1*time.Second), "nil DB should fail") - - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - assert.NoError(suite.T(), err) - assert.NoError(suite.T(), checkDB(Conf.API.DB, 1*time.Second), "ping should succeed") } func (suite *SyncAPITest) TestDatasetRoute() { @@ -301,9 +206,6 @@ func (suite *SyncAPITest) TestDatasetRoute() { Conf.API.MQ, err = broker.NewMQ(Conf.Broker) assert.NoError(suite.T(), err) - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - assert.NoError(suite.T(), err) - Conf.Broker.SchemasPath = "../../schemas/isolated/" r := mux.NewRouter() @@ -322,31 +224,6 @@ func (suite *SyncAPITest) TestDatasetRoute() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), http.StatusBadRequest, bad.StatusCode) defer bad.Body.Close() - - fileID, err := Conf.API.DB.RegisterFile("/user/file-1.c4gh", "test.user@example.com") - assert.NoError(suite.T(), err, "failed to register file in database") - err = Conf.API.DB.SetAccessionID("5fe7b660-afea-4c3a-88a9-3daabf055ebb", fileID) - assert.NoError(suite.T(), err, "got (%v) when getting file archive information", err) - - fileID, err = Conf.API.DB.RegisterFile("/user/file-2.c4gh", "test.user@example.com") - assert.NoError(suite.T(), err, "failed to register file in database") - err = Conf.API.DB.SetAccessionID("ed6af454-d910-49e3-8cda-488a6f246e76", fileID) - assert.NoError(suite.T(), err, "got (%v) when getting file archive information", err) - - accessions := []string{"5fe7b660-afea-4c3a-88a9-3daabf055ebb", "ed6af454-d910-49e3-8cda-488a6f246e76"} - diSet := map[string][]string{ - "cd532362-e06e-4460-8490-b9ce64b8d9e6": accessions[0:1], - } - - for di, acs := range diSet { - err := Conf.API.DB.MapFilesToDataset(di, acs) - assert.NoError(suite.T(), err, "failed to map file to dataset") - } - - exists, err := http.Post(ts.URL+"/dataset", "application/json", bytes.NewBuffer(goodJSON)) - assert.NoError(suite.T(), err) - assert.Equal(suite.T(), http.StatusAlreadyReported, exists.StatusCode) - defer good.Body.Close() } func (suite *SyncAPITest) TestMetadataRoute() { @@ -371,76 +248,6 @@ func (suite *SyncAPITest) TestMetadataRoute() { defer bad.Body.Close() } -func (suite *SyncAPITest) TestBuildJSON() { - suite.SetupTest() - Conf, err = config.NewConfig("sync-api") - assert.NoError(suite.T(), err) - - Conf.API.MQ, err = broker.NewMQ(Conf.Broker) - assert.NoError(suite.T(), err) - - Conf.API.DB, err = database.NewSDAdb(Conf.Database) - assert.NoError(suite.T(), err) - - m := []byte(`{"type":"mapping", "dataset_id": "cd532362-e06e-4461-8490-b9ce64b8d9e7", "accession_ids": ["ed6af454-d910-49e3-8cda-488a6f246e67"]}`) - _, err := buildSyncDatasetJSON(m) - assert.EqualError(suite.T(), err, "sql: no rows in result set") - - fileID, err := Conf.API.DB.RegisterFile("dummy.user/test/file1.c4gh", "dummy.user") - assert.NoError(suite.T(), err, "failed to register file in database") - err = Conf.API.DB.SetAccessionID("ed6af454-d910-49e3-8cda-488a6f246e67", fileID) - assert.NoError(suite.T(), err) - - checksum := sha256.New() - fileInfo := database.FileInfo{Checksum: sha256.New(), Size: 1234, Path: "dummy.user/test/file1.c4gh", DecryptedChecksum: checksum, DecryptedSize: 999} - corrID := uuid.New().String() - - err = Conf.API.DB.SetArchived(fileInfo, fileID, corrID) - assert.NoError(suite.T(), err, "failed to mark file as Archived") - err = Conf.API.DB.MarkCompleted(fileInfo, fileID, corrID) - assert.NoError(suite.T(), err, "failed to mark file as Verified") - - accessions := []string{"ed6af454-d910-49e3-8cda-488a6f246e67"} - assert.NoError(suite.T(), Conf.API.DB.MapFilesToDataset("cd532362-e06e-4461-8490-b9ce64b8d9e7", accessions), "failed to map file to dataset") - - jsonData, err := buildSyncDatasetJSON(m) - assert.NoError(suite.T(), err) - dataset := []byte(`{"dataset_id":"cd532362-e06e-4461-8490-b9ce64b8d9e7","dataset_files":[{"filepath":"dummy.user/test/file1.c4gh","file_id":"ed6af454-d910-49e3-8cda-488a6f246e67","sha256":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}],"user":"dummy.user"}`) - assert.Equal(suite.T(), dataset, jsonData) -} - -func (suite *SyncAPITest) TestSendPOST() { - r := http.NewServeMux() - r.HandleFunc("/dataset", func(w http.ResponseWriter, r *http.Request) { - _, err = w.Write([]byte(fmt.Sprint(http.StatusOK))) - assert.NoError(suite.T(), err) - }) - ts := httptest.NewServer(r) - defer ts.Close() - - Conf = &config.Config{} - Conf.SyncAPI = config.SyncAPIConf{ - RemoteHost: ts.URL, - RemoteUser: "test", - RemotePassword: "test", - } - syncJSON := []byte(`{"user":"test.user@example.com", "dataset_id": "cd532362-e06e-4460-8490-b9ce64b8d9e7", "dataset_files": [{"filepath": "inbox/user/file1.c4gh","file_id": "5fe7b660-afea-4c3a-88a9-3daabf055ebb", "sha256": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6"}, {"filepath": "inbox/user/file2.c4gh","file_id": "ed6af454-d910-49e3-8cda-488a6f246e76", "sha256": "c967d96e56dec0f0cfee8f661846238b7f15771796ee1c345cae73cd812acc2b"}]}`) - err := sendPOST(syncJSON) - assert.NoError(suite.T(), err) -} - -func (suite *SyncAPITest) TestCreateHostURL() { - Conf = &config.Config{} - Conf.SyncAPI = config.SyncAPIConf{ - RemoteHost: "http://localhost", - RemotePort: 443, - } - - s, err := createHostURL(Conf.SyncAPI.RemoteHost, Conf.SyncAPI.RemotePort) - assert.NoError(suite.T(), err) - assert.Equal(suite.T(), "http://localhost:443/dataset", s) -} - func (suite *SyncAPITest) TestBasicAuth() { Conf = &config.Config{} Conf.Broker.SchemasPath = "../../schemas" diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 61ce5ade5..49a6c03f8 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -492,12 +492,6 @@ func NewConfig(app string) (*Config, error) { c.configSchemas() case "sync-api": - if viper.IsSet("db.host") { - if err := c.configDatabase(); err != nil { - return nil, err - } - } - if err := c.configBroker(); err != nil { return nil, err } @@ -506,10 +500,7 @@ func NewConfig(app string) (*Config, error) { return nil, err } - if viper.IsSet("sync.api.remote.host") { - c.configSyncAPI() - } - + c.configSyncAPI() c.configSchemas() return c, nil From 5fe6fab4bb724c3008ef32f3bb0100b6386a9e62 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Tue, 7 Nov 2023 15:33:14 +0100 Subject: [PATCH 20/34] [config] remove remote end from sync api --- sda/internal/config/config_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index bb82b322e..2f9f19262 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -351,16 +351,8 @@ func (suite *ConfigTestSuite) TestConfigSyncAPI() { viper.Set("sync.api.user", "user") viper.Set("sync.api.password", "password") - viper.Set("sync.api.remote.host", "remote-host") - viper.Set("sync.api.remote.port", 1234) - viper.Set("sync.api.remote.user", "remote-user") - viper.Set("sync.api.remote.pass", "remote-pass") config, err := NewConfig("sync-api") assert.NoError(suite.T(), err) - assert.Equal(suite.T(), "remote-host", config.SyncAPI.RemoteHost) - assert.Equal(suite.T(), 1234, config.SyncAPI.RemotePort) - assert.Equal(suite.T(), "remote-user", config.SyncAPI.RemoteUser) - assert.Equal(suite.T(), "remote-pass", config.SyncAPI.RemotePassword) assert.Equal(suite.T(), "user", config.SyncAPI.APIUser) assert.Equal(suite.T(), "password", config.SyncAPI.APIPassword) } From 4b39d4b1faaf6ffb8de297a4cca0033ef18eaaaa Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:33:45 +0100 Subject: [PATCH 21/34] [sync-api] cleanup error messages --- sda/cmd/syncapi/syncapi.go | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/sda/cmd/syncapi/syncapi.go b/sda/cmd/syncapi/syncapi.go index 97d516427..7b038c83d 100644 --- a/sda/cmd/syncapi/syncapi.go +++ b/sda/cmd/syncapi/syncapi.go @@ -149,11 +149,7 @@ func dataset(w http.ResponseWriter, r *http.Request) { } if err := parseDatasetMessage(b); err != nil { - if err.Error() == "Dataset exists" { - w.WriteHeader(http.StatusAlreadyReported) - } else { - w.WriteHeader(http.StatusInternalServerError) - } + w.WriteHeader(http.StatusInternalServerError) } w.WriteHeader(http.StatusOK) @@ -161,6 +157,7 @@ func dataset(w http.ResponseWriter, r *http.Request) { // parsemessage parses the JSON blob and sends the relevant messages func parseDatasetMessage(msg []byte) error { + log.Debugf("incoming blob %s", msg) blob := syncDataset{} _ = json.Unmarshal(msg, &blob) @@ -173,11 +170,11 @@ func parseDatasetMessage(msg []byte) error { } ingestMsg, err := json.Marshal(ingest) if err != nil { - return fmt.Errorf("Failed to marshal json messge: Reason %v", err) + return fmt.Errorf("failed to marshal json messge: Reason %v", err) } - if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "ingest", ingestMsg); err != nil { - return fmt.Errorf("Failed to send ingest messge: Reason %v", err) + if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, Conf.SyncAPI.IngestRouting, ingestMsg); err != nil { + return fmt.Errorf("failed to send ingest messge: Reason %v", err) } accessionIDs = append(accessionIDs, files.FileID) @@ -190,11 +187,11 @@ func parseDatasetMessage(msg []byte) error { } finalizeMsg, err := json.Marshal(finalize) if err != nil { - return fmt.Errorf("Failed to marshal json messge: Reason %v", err) + return fmt.Errorf("failed to marshal json messge: Reason %v", err) } - if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "accession", finalizeMsg); err != nil { - return fmt.Errorf("Failed to send mapping messge: Reason %v", err) + if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, Conf.SyncAPI.AccessionRouting, finalizeMsg); err != nil { + return fmt.Errorf("failed to send mapping messge: Reason %v", err) } } @@ -205,11 +202,11 @@ func parseDatasetMessage(msg []byte) error { } mappingMsg, err := json.Marshal(mappings) if err != nil { - return fmt.Errorf("Failed to marshal json messge: Reason %v", err) + return fmt.Errorf("failed to marshal json messge: Reason %v", err) } - if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, "mappings", mappingMsg); err != nil { - return fmt.Errorf("Failed to send mapping messge: Reason %v", err) + if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, Conf.SyncAPI.MappingRouting, mappingMsg); err != nil { + return fmt.Errorf("failed to send mapping messge: Reason %v", err) } return nil From fa15d0ed662a97e531f478dee82b5831646786d2 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:34:56 +0100 Subject: [PATCH 22/34] [config] fix sync and sync-api --- sda/internal/config/config.go | 40 +++++++++++++++--------------- sda/internal/config/config_test.go | 10 +++++++- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 49a6c03f8..91100e3d5 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -9,11 +9,10 @@ import ( "strings" "time" + "github.com/neicnordic/crypt4gh/keys" "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/database" "github.com/neicnordic/sensitive-data-archive/internal/storage" - - "github.com/neicnordic/crypt4gh/keys" "github.com/pkg/errors" log "github.com/sirupsen/logrus" "github.com/spf13/viper" @@ -60,12 +59,11 @@ type Sync struct { } type SyncAPIConf struct { - APIPassword string - APIUser string - RemoteHost string - RemotePassword string - RemotePort int - RemoteUser string + APIPassword string + APIUser string + AccessionRouting string `default:"accession"` + IngestRouting string `default:"ingest"` + MappingRouting string `default:"mappings"` } type APIConf struct { @@ -284,7 +282,6 @@ func NewConfig(app string) (*Config, error) { "broker.user", "broker.password", "broker.queue", - "centerPrefix", "c4gh.filepath", "c4gh.passphrase", "c4gh.syncPubKeyPath", @@ -293,6 +290,10 @@ func NewConfig(app string) (*Config, error) { "db.user", "db.password", "db.database", + "sync.centerPrefix", + "sync.remote.host", + "sync.remote.user", + "sync.remote.password", } switch viper.GetString("archive.type") { @@ -321,7 +322,6 @@ func NewConfig(app string) (*Config, error) { "broker.port", "broker.user", "broker.password", - "broker.queue", "sync.api.user", "sync.api.password", } @@ -490,7 +490,6 @@ func NewConfig(app string) (*Config, error) { c.configArchive() c.configSync() c.configSchemas() - case "sync-api": if err := c.configBroker(); err != nil { return nil, err @@ -502,8 +501,6 @@ func NewConfig(app string) (*Config, error) { c.configSyncAPI() c.configSchemas() - - return c, nil case "verify": c.configArchive() @@ -834,9 +831,8 @@ func (c *Config) configSync() { if viper.IsSet("sync.remote.port") { c.Sync.RemotePort = viper.GetInt("sync.remote.port") } - c.Sync.RemotePassword = viper.GetString("sync.remote.pass") + c.Sync.RemotePassword = viper.GetString("sync.remote.password") c.Sync.RemoteUser = viper.GetString("sync.remote.user") - c.Sync.CenterPrefix = viper.GetString("sync.centerPrefix") } @@ -846,12 +842,16 @@ func (c *Config) configSyncAPI() { c.SyncAPI.APIPassword = viper.GetString("sync.api.password") c.SyncAPI.APIUser = viper.GetString("sync.api.user") - c.SyncAPI.RemoteHost = viper.GetString("sync.api.remote.host") - if viper.IsSet("sync.api.remote.port") { - c.SyncAPI.RemotePort = viper.GetInt("sync.api.remote.port") + if viper.IsSet("sync.api.AccessionRouting") { + c.SyncAPI.AccessionRouting = viper.GetString("sync.api.AccessionRouting") } - c.SyncAPI.RemotePassword = viper.GetString("sync.api.remote.pass") - c.SyncAPI.RemoteUser = viper.GetString("sync.api.remote.user") + if viper.IsSet("sync.api.IngestRouting") { + c.SyncAPI.IngestRouting = viper.GetString("sync.api.IngestRouting") + } + if viper.IsSet("sync.api.MappingRouting") { + c.SyncAPI.MappingRouting = viper.GetString("sync.api.MappingRouting") + } + } // GetC4GHKey reads and decrypts and returns the c4gh key diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 2f9f19262..d599a711b 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -275,11 +275,14 @@ func (suite *ConfigTestSuite) TestSyncConfig() { assert.Error(suite.T(), err) assert.Nil(suite.T(), config) - viper.Set("centerPrefix", "prefix") viper.Set("archive.type", "posix") viper.Set("archive.location", "test") + viper.Set("sync.centerPrefix", "prefix") viper.Set("sync.destination.type", "posix") viper.Set("sync.destination.location", "test") + viper.Set("sync.remote.host", "https://test.org") + viper.Set("sync.remote.user", "test") + viper.Set("sync.remote.password", "test") viper.Set("c4gh.filepath", "/keys/key") viper.Set("c4gh.passphrase", "pass") viper.Set("c4gh.syncPubKeyPath", "/keys/recipient") @@ -355,4 +358,9 @@ func (suite *ConfigTestSuite) TestConfigSyncAPI() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "user", config.SyncAPI.APIUser) assert.Equal(suite.T(), "password", config.SyncAPI.APIPassword) + + viper.Set("sync.api.AccessionRouting", "wrong") + config, err = NewConfig("sync-api") + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), "wrong", config.SyncAPI.AccessionRouting) } From 41ebaf7654f024fa7ddab05ca5078da4373e4794 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:35:31 +0100 Subject: [PATCH 23/34] [schemas] encrypted checksums are not mandatory for ingestion-trigger --- sda/internal/schema/schema.go | 7 +++---- sda/internal/schema/schema_test.go | 6 ------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/sda/internal/schema/schema.go b/sda/internal/schema/schema.go index 2f6c4c022..c01440469 100644 --- a/sda/internal/schema/schema.go +++ b/sda/internal/schema/schema.go @@ -139,10 +139,9 @@ type IngestionCompletion struct { } type IngestionTrigger struct { - Type string `json:"type"` - User string `json:"user"` - FilePath string `json:"filepath"` - EncryptedChecksums []Checksums `json:"encrypted_checksums"` + Type string `json:"type"` + User string `json:"user"` + FilePath string `json:"filepath"` } type IngestionUserError struct { diff --git a/sda/internal/schema/schema_test.go b/sda/internal/schema/schema_test.go index 5ea00d7f4..d4c85b647 100644 --- a/sda/internal/schema/schema_test.go +++ b/sda/internal/schema/schema_test.go @@ -261,9 +261,6 @@ func TestValidateJSONIngestionTrigger(t *testing.T) { Type: "ingest", User: "JohnDoe", FilePath: "path/to/file", - EncryptedChecksums: []Checksums{ - {Type: "sha256", Value: "da886a89637d125ef9f15f6d676357f3a9e5e10306929f0bad246375af89c2e2"}, - }, } msg, _ := json.Marshal(okMsg) @@ -273,9 +270,6 @@ func TestValidateJSONIngestionTrigger(t *testing.T) { badMsg := IngestionTrigger{ User: "JohnDoe", FilePath: "path/to file", - EncryptedChecksums: []Checksums{ - {Type: "sha256", Value: "da886a89637d125ef9f15f6d676357f3a9e5e10306929f0bad246375af89c2e2"}, - }, } msg, _ = json.Marshal(badMsg) From 788a151ab12d6d581b0aa727776750769e1b3ede Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:36:21 +0100 Subject: [PATCH 24/34] [schemas] set correct url in json files --- sda/schemas/federated/dataset-deprecate.json | 2 +- sda/schemas/federated/dataset-mapping.json | 2 +- sda/schemas/federated/dataset-release.json | 2 +- sda/schemas/federated/inbox-remove.json | 2 +- sda/schemas/federated/inbox-rename.json | 2 +- sda/schemas/federated/inbox-upload.json | 2 +- sda/schemas/federated/info-error.json | 2 +- sda/schemas/federated/ingestion-accession-request.json | 2 +- sda/schemas/federated/ingestion-accession.json | 2 +- sda/schemas/federated/ingestion-completion.json | 2 +- sda/schemas/federated/ingestion-trigger.json | 2 +- sda/schemas/federated/ingestion-user-error.json | 2 +- sda/schemas/federated/ingestion-verification.json | 2 +- sda/schemas/isolated/dataset-deprecate.json | 1 + sda/schemas/isolated/dataset-mapping.json | 2 +- sda/schemas/isolated/dataset-release.json | 1 + sda/schemas/isolated/inbox-remove.json | 2 +- sda/schemas/isolated/inbox-rename.json | 2 +- sda/schemas/isolated/inbox-upload.json | 2 +- sda/schemas/isolated/ingestion-accession.json | 2 +- sda/schemas/isolated/ingestion-completion.json | 2 +- 21 files changed, 21 insertions(+), 19 deletions(-) create mode 120000 sda/schemas/isolated/dataset-deprecate.json create mode 120000 sda/schemas/isolated/dataset-release.json diff --git a/sda/schemas/federated/dataset-deprecate.json b/sda/schemas/federated/dataset-deprecate.json index 3844f0659..620776446 100644 --- a/sda/schemas/federated/dataset-deprecate.json +++ b/sda/schemas/federated/dataset-deprecate.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA dataset deprecation message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/dataset-deprecate.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/dataset-deprecate.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/dataset-mapping.json b/sda/schemas/federated/dataset-mapping.json index 574e0743d..07a631834 100644 --- a/sda/schemas/federated/dataset-mapping.json +++ b/sda/schemas/federated/dataset-mapping.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA dataset mapping message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/dataset-mapping.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/dataset-mapping.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/dataset-release.json b/sda/schemas/federated/dataset-release.json index e1e688fb4..0a4ccf2c2 100644 --- a/sda/schemas/federated/dataset-release.json +++ b/sda/schemas/federated/dataset-release.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA dataset release message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/dataset-release.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/dataset-release.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/inbox-remove.json b/sda/schemas/federated/inbox-remove.json index ed19753e5..23acc6603 100644 --- a/sda/schemas/federated/inbox-remove.json +++ b/sda/schemas/federated/inbox-remove.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA inbox remove message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-remove.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/inbox-remove.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/inbox-rename.json b/sda/schemas/federated/inbox-rename.json index e8aa5ae4e..a8e185983 100644 --- a/sda/schemas/federated/inbox-rename.json +++ b/sda/schemas/federated/inbox-rename.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA inbox rename message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-rename.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/inbox-rename.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/inbox-upload.json b/sda/schemas/federated/inbox-upload.json index 02ab62659..4158a12a9 100644 --- a/sda/schemas/federated/inbox-upload.json +++ b/sda/schemas/federated/inbox-upload.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA inbox upload message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-upload.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/inbox-upload.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/info-error.json b/sda/schemas/federated/info-error.json index 881e835cd..5ac8dcf85 100644 --- a/sda/schemas/federated/info-error.json +++ b/sda/schemas/federated/info-error.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA dataset mapping message interface", - "$id": "https://github.com/neicnordic/sda-pipeline/tree/master/schemas/info-error.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/info-error.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/ingestion-accession-request.json b/sda/schemas/federated/ingestion-accession-request.json index 61bd8ba56..0030e6863 100644 --- a/sda/schemas/federated/ingestion-accession-request.json +++ b/sda/schemas/federated/ingestion-accession-request.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA message interface for requesting an Accession ID to Central EGA", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-accession-request.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/ingestion-accession-request.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/ingestion-accession.json b/sda/schemas/federated/ingestion-accession.json index a8d6b4799..b39de8422 100644 --- a/sda/schemas/federated/ingestion-accession.json +++ b/sda/schemas/federated/ingestion-accession.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA accession message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-accession.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/ingestion-accession.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/ingestion-completion.json b/sda/schemas/federated/ingestion-completion.json index 06854fc78..9218a19ce 100644 --- a/sda/schemas/federated/ingestion-completion.json +++ b/sda/schemas/federated/ingestion-completion.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA message completion to Central EGA", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-completion.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/ingestion-completion.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/ingestion-trigger.json b/sda/schemas/federated/ingestion-trigger.json index 404733939..87bb270e6 100644 --- a/sda/schemas/federated/ingestion-trigger.json +++ b/sda/schemas/federated/ingestion-trigger.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA ingestion trigger message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-ingest.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/ingestion-trigger.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/ingestion-user-error.json b/sda/schemas/federated/ingestion-user-error.json index 8599ea147..f94f67386 100644 --- a/sda/schemas/federated/ingestion-user-error.json +++ b/sda/schemas/federated/ingestion-user-error.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA message interface to Central EGA", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-user-error.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/ingestion-user-error.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/federated/ingestion-verification.json b/sda/schemas/federated/ingestion-verification.json index fd63dcbf5..9e645d089 100644 --- a/sda/schemas/federated/ingestion-verification.json +++ b/sda/schemas/federated/ingestion-verification.json @@ -1,6 +1,6 @@ { "title": "JSON schema for SDA verification message interface", - "$id": "https://github.com/neicnordic/sda-pipeline/tree/master/schemas/ingestion-verification.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/federated/ingestion-verification.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/isolated/dataset-deprecate.json b/sda/schemas/isolated/dataset-deprecate.json new file mode 120000 index 000000000..c3fd79a98 --- /dev/null +++ b/sda/schemas/isolated/dataset-deprecate.json @@ -0,0 +1 @@ +../federated/dataset-deprecate.json \ No newline at end of file diff --git a/sda/schemas/isolated/dataset-mapping.json b/sda/schemas/isolated/dataset-mapping.json index 603713c4d..28f736d72 100644 --- a/sda/schemas/isolated/dataset-mapping.json +++ b/sda/schemas/isolated/dataset-mapping.json @@ -1,6 +1,6 @@ { "title": "JSON schema for dataset mapping message interface. Derived from Federated EGA schemas.", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/dataset-mapping.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/isolated/dataset-mapping.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/isolated/dataset-release.json b/sda/schemas/isolated/dataset-release.json new file mode 120000 index 000000000..e22bb197c --- /dev/null +++ b/sda/schemas/isolated/dataset-release.json @@ -0,0 +1 @@ +../federated/dataset-release.json \ No newline at end of file diff --git a/sda/schemas/isolated/inbox-remove.json b/sda/schemas/isolated/inbox-remove.json index c371d6b2b..1c71456b0 100644 --- a/sda/schemas/isolated/inbox-remove.json +++ b/sda/schemas/isolated/inbox-remove.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA inbox remove message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-remove.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/isolated/inbox-remove.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/isolated/inbox-rename.json b/sda/schemas/isolated/inbox-rename.json index d557b2c87..445376684 100644 --- a/sda/schemas/isolated/inbox-rename.json +++ b/sda/schemas/isolated/inbox-rename.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA inbox rename message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-rename.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/isolated/inbox-rename.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/isolated/inbox-upload.json b/sda/schemas/isolated/inbox-upload.json index 05a6585b1..a21ab542b 100644 --- a/sda/schemas/isolated/inbox-upload.json +++ b/sda/schemas/isolated/inbox-upload.json @@ -1,6 +1,6 @@ { "title": "JSON schema for Local EGA inbox upload message interface", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-upload.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/isolated/inbox-upload.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/isolated/ingestion-accession.json b/sda/schemas/isolated/ingestion-accession.json index 844e363d1..4b3cdfe16 100644 --- a/sda/schemas/isolated/ingestion-accession.json +++ b/sda/schemas/isolated/ingestion-accession.json @@ -1,6 +1,6 @@ { "title": "JSON schema for accession message interface. Derived from Federated EGA schemas.", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-accession.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/isolated/ingestion-accession.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ diff --git a/sda/schemas/isolated/ingestion-completion.json b/sda/schemas/isolated/ingestion-completion.json index 6e6034381..d57ec3768 100644 --- a/sda/schemas/isolated/ingestion-completion.json +++ b/sda/schemas/isolated/ingestion-completion.json @@ -1,6 +1,6 @@ { "title": "JSON schema for sending message for ingestion completion. Derived from Federated EGA schemas.", - "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-completion.json", + "$id": "https://github.com/neicnordic/sensitive-data-archive/tree/master/sda/schemas/isolated/ingestion-completion.json", "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "required": [ From 8187bdc0671c65dcc18cae4ebb1a2af5076d5ed7 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:37:16 +0100 Subject: [PATCH 25/34] [finalize] nack message if `GetFilestatus` fails --- sda/cmd/finalize/finalize.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sda/cmd/finalize/finalize.go b/sda/cmd/finalize/finalize.go index 7028c6f30..6ab14c25f 100644 --- a/sda/cmd/finalize/finalize.go +++ b/sda/cmd/finalize/finalize.go @@ -90,6 +90,11 @@ func main() { status, err := db.GetFileStatus(delivered.CorrelationId) if err != nil { log.Errorf("failed to get file status, reason: %v", err) + if err := delivered.Nack(false, true); err != nil { + log.Errorf("failed to Nack message, reason: (%v)", err) + } + + continue } if status == "disabled" { log.Infof("file with correlation ID: %s is disabled, stopping work", delivered.CorrelationId) From 1c371f244e3fa86d03f5de97d586584a55f1a7e3 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:50:59 +0100 Subject: [PATCH 26/34] [Go.mod] add missing library --- sda/go.mod | 1 + sda/go.sum | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sda/go.mod b/sda/go.mod index 7efa8d4ef..064747b59 100644 --- a/sda/go.mod +++ b/sda/go.mod @@ -43,6 +43,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + github.com/gorilla/mux v1.8.1 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect diff --git a/sda/go.sum b/sda/go.sum index fa409c654..8d84f3085 100644 --- a/sda/go.sum +++ b/sda/go.sum @@ -167,6 +167,8 @@ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= +github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= From cbad7934f42a15cb332ea41bd7169fa5d34adfdd Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:53:14 +0100 Subject: [PATCH 27/34] [integration test] update test for data sync --- .github/integration/sda/config.yaml | 27 ++++++++---- .../integration/tests/sda/10_upload_test.sh | 22 +++++----- .../tests/sda/20_ingest-verify_test.sh | 4 +- .../tests/sda/30_backup-finalize_test.sh | 11 +++-- .../integration/tests/sda/40_mapper_test.sh | 44 ++++++++++++++++++- .github/integration/tests/sda/45_sync_test.sh | 6 ++- 6 files changed, 87 insertions(+), 27 deletions(-) diff --git a/.github/integration/sda/config.yaml b/.github/integration/sda/config.yaml index 6690b1fbe..bcc69b99c 100644 --- a/.github/integration/sda/config.yaml +++ b/.github/integration/sda/config.yaml @@ -58,12 +58,21 @@ server: jwtpubkeypath: "/shared/keys/pub/" jwtpubkeyurl: "http://oidc:8080/jwk" -sync.destination: - type: "s3" - url: "http://s3" - port: 9000 - readypath: "/minio/health/ready" - accessKey: "access" - secretKey: "secretKey" - bucket: "sync" - region: "us-east-1" +sync: + centerPrefix: "SYNC" + destination: + type: "s3" + url: "http://s3" + port: 9000 + readypath: "/minio/health/ready" + accessKey: "access" + secretKey: "secretKey" + bucket: "sync" + region: "us-east-1" + remote: + host: "http://sync-api" + port: "8080" + password: "pass" + user: "user" + +schema.type: "isolated" \ No newline at end of file diff --git a/.github/integration/tests/sda/10_upload_test.sh b/.github/integration/tests/sda/10_upload_test.sh index bfffa2201..fb3c36b8d 100644 --- a/.github/integration/tests/sda/10_upload_test.sh +++ b/.github/integration/tests/sda/10_upload_test.sh @@ -70,7 +70,7 @@ fi if [ "$STORAGETYPE" = "s3" ]; then pip -q install s3cmd - for file in NA12878.bam NA12878_20k_b37.bam; do + for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do curl -s -L -o /shared/$file "https://github.com/ga4gh/htsget-refserver/raw/main/data/gcp/gatk-test-data/wgs_bam/$file" if [ ! -f "$file.c4gh" ]; then yes | /shared/crypt4gh encrypt -p c4gh.pub.pem -f "$file" @@ -87,7 +87,7 @@ fi echo "waiting for upload to complete" RETRY_TIMES=0 -until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 4 ]; do +until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 6 ]; do echo "waiting for upload to complete" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then @@ -99,14 +99,14 @@ done if [ "$STORAGETYPE" = "s3" ]; then num_rows=$(psql -U postgres -h postgres -d sda -At -c "SELECT COUNT(*) from sda.files;") - if [ "$num_rows" -ne 3 ]; then - echo "database queries for register_files failed, expected 3 got $num_rows" + if [ "$num_rows" -ne 5 ]; then + echo "database queries for register_files failed, expected 5 got $num_rows" exit 1 fi num_log_rows=$(psql -U postgres -h postgres -d sda -At -c "SELECT COUNT(*) from sda.file_event_log;") - if [ "$num_log_rows" -ne 8 ]; then - echo "database queries for file_event_logs failed, expected 8 got $num_log_rows" + if [ "$num_log_rows" -ne 12 ]; then + echo "database queries for file_event_logs failed, expected 12 got $num_log_rows" exit 1 fi @@ -120,7 +120,7 @@ if [ "$STORAGETYPE" = "s3" ]; then ## verify that messages exists in MQ echo "waiting for upload to complete" RETRY_TIMES=0 - until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 5 ]; do + until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 7 ]; do echo "waiting for upload to complete" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then @@ -131,14 +131,14 @@ if [ "$STORAGETYPE" = "s3" ]; then done num_rows=$(psql -U postgres -h postgres -d sda -At -c "SELECT COUNT(*) from sda.files;") - if [ "$num_rows" -ne 4 ]; then - echo "database queries for register_files failed, expected 4 got $num_rows" + if [ "$num_rows" -ne 6 ]; then + echo "database queries for register_files failed, expected 6 got $num_rows" exit 1 fi num_log_rows=$(psql -U postgres -h postgres -d sda -At -c "SELECT COUNT(*) from sda.file_event_log;") - if [ "$num_log_rows" -ne 10 ]; then - echo "database queries for file_event_logs failed, expected 10 got $num_log_rows" + if [ "$num_log_rows" -ne 14 ]; then + echo "database queries for file_event_logs failed, expected 14 got $num_log_rows" exit 1 fi fi diff --git a/.github/integration/tests/sda/20_ingest-verify_test.sh b/.github/integration/tests/sda/20_ingest-verify_test.sh index 2935a817e..a76aaf051 100644 --- a/.github/integration/tests/sda/20_ingest-verify_test.sh +++ b/.github/integration/tests/sda/20_ingest-verify_test.sh @@ -3,7 +3,7 @@ set -e cd shared || true -for file in NA12878.bam NA12878_20k_b37.bam; do +for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do ENC_SHA=$(sha256sum "$file.c4gh" | cut -d' ' -f 1) ENC_MD5=$(md5sum "$file.c4gh" | cut -d' ' -f 1) @@ -59,7 +59,7 @@ done echo "waiting for verify to complete" RETRY_TIMES=0 -until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/verified/ | jq -r '.messages_ready')" -eq 2 ]; do +until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/verified/ | jq -r '.messages_ready')" -eq 4 ]; do echo "waiting for verify to complete" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then diff --git a/.github/integration/tests/sda/30_backup-finalize_test.sh b/.github/integration/tests/sda/30_backup-finalize_test.sh index 9e2c30087..330597b0e 100644 --- a/.github/integration/tests/sda/30_backup-finalize_test.sh +++ b/.github/integration/tests/sda/30_backup-finalize_test.sh @@ -28,12 +28,17 @@ while [ $i -le 2 ]; do '$ARGS.named' ) + accession_id=EGAF7490000000$i + if [[ "$filepath" == *.bai.c4gh ]]; then + accession_id="SYNC-123-0000$i" + fi + accession_payload=$( jq -r -c -n \ --arg type accession \ --arg user "$user" \ --arg filepath "$filepath" \ - --arg accession_id "EGAF7490000000$i" \ + --arg accession_id "$accession_id" \ --argjson decrypted_checksums "$decrypted_checksums" \ '$ARGS.named|@base64' ) @@ -58,7 +63,7 @@ done echo "waiting for finalize to complete" -until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/completed/ | jq -r '.messages_ready')" -eq 2 ]; do +until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/completed/ | jq -r '.messages_ready')" -eq 4 ]; do echo "waiting for finalize to complete" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then @@ -88,7 +93,7 @@ socket_timeout = 30 EOD # check DB for archive file names -for file in NA12878.bam.c4gh NA12878_20k_b37.bam.c4gh; do +for file in NA12878.bam.c4gh NA12878.bai.c4gh NA12878_20k_b37.bam.c4gh NA12878_20k_b37.bai.c4gh; do archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = 'test_dummy.org/$file';") size=$(s3cmd -c direct ls s3://backup/"$archiveName" | tr -s ' ' | cut -d ' ' -f 3) if [ "$size" -eq 0 ]; then diff --git a/.github/integration/tests/sda/40_mapper_test.sh b/.github/integration/tests/sda/40_mapper_test.sh index 7de50977f..416b2bdd3 100644 --- a/.github/integration/tests/sda/40_mapper_test.sh +++ b/.github/integration/tests/sda/40_mapper_test.sh @@ -153,4 +153,46 @@ until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.dat sleep 2 done -echo "dataset deprecated successfully" \ No newline at end of file +echo "dataset deprecated successfully" + +mappings=$( + jq -c -n \ + '$ARGS.positional' \ + --args "SYNC-123-00003" \ + --args "SYNC-123-00004" +) + +mapping_payload=$( + jq -r -c -n \ + --arg type mapping \ + --arg dataset_id SYNC-001-12345 \ + --argjson accession_ids "$mappings" \ + '$ARGS.named|@base64' +) + +mapping_body=$( + jq -c -n \ + --arg vhost test \ + --arg name sda \ + --argjson properties "$properties" \ + --arg routing_key "mappings" \ + --arg payload_encoding base64 \ + --arg payload "$mapping_payload" \ + '$ARGS.named' +) + +curl -s -u guest:guest "http://rabbitmq:15672/api/exchanges/sda/sda/publish" \ + -H 'Content-Type: application/json;charset=UTF-8' \ + -d "$mapping_body" + +# check DB for dataset contents +RETRY_TIMES=0 +until [ "$(psql -U postgres -h postgres -d sda -At -c "select count(id) from sda.file_dataset where dataset_id = (select id from sda.datasets where stable_id = 'SYNC-001-12345')")" -eq 2 ]; do + echo "waiting for mapper to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "::error::Time out while waiting for dataset to be mapped" + exit 1 + fi + sleep 2 +done \ No newline at end of file diff --git a/.github/integration/tests/sda/45_sync_test.sh b/.github/integration/tests/sda/45_sync_test.sh index 5c0f37422..0d949bd06 100644 --- a/.github/integration/tests/sda/45_sync_test.sh +++ b/.github/integration/tests/sda/45_sync_test.sh @@ -3,8 +3,12 @@ set -e cd shared || true +if [ "$STORAGETYPE" = "posix" ]; then + exit 0 +fi + # check bucket for synced files -for file in NA12878.bam.c4gh NA12878_20k_b37.bam.c4gh; do +for file in NA12878.bai NA12878_20k_b37.bai; do RETRY_TIMES=0 until [ "$(s3cmd -c direct ls s3://sync/test_dummy.org/"$file")" != "" ]; do RETRY_TIMES=$((RETRY_TIMES + 1)) From 546f740693b759814fe27be7b095ccc68c66b541 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 09:56:16 +0100 Subject: [PATCH 28/34] [integration test] add case for sync-api --- .github/integration/sda-s3-integration.yml | 21 +++++++++++++++++++ .github/integration/sda/config.yaml | 3 +++ .../integration/tests/sda/40_mapper_test.sh | 8 +++---- .github/integration/tests/sda/45_sync_test.sh | 14 ++++++++++++- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/.github/integration/sda-s3-integration.yml b/.github/integration/sda-s3-integration.yml index e47f529c9..ffffd50b6 100644 --- a/.github/integration/sda-s3-integration.yml +++ b/.github/integration/sda-s3-integration.yml @@ -232,6 +232,25 @@ services: - ./sda/config.yaml:/config.yaml - shared:/shared + sync-api: + image: ghcr.io/neicnordic/sensitive-data-archive:PR${PR_NUMBER} + command: [ sda-syncapi ] + container_name: sync-api + depends_on: + credentials: + condition: service_completed_successfully + rabbitmq: + condition: service_healthy + environment: + - BROKER_PASSWORD=sync + - BROKER_USER=sync + - BROKER_EXCHANGE=sda.dead + ports: + - "18080:8080" + restart: always + volumes: + - ./sda/config.yaml:/config.yaml + oidc: container_name: oidc command: @@ -276,6 +295,8 @@ services: condition: service_started sync: condition: service_started + sync-api: + condition: service_started verify: condition: service_started environment: diff --git a/.github/integration/sda/config.yaml b/.github/integration/sda/config.yaml index bcc69b99c..9b44e5d75 100644 --- a/.github/integration/sda/config.yaml +++ b/.github/integration/sda/config.yaml @@ -59,6 +59,9 @@ server: jwtpubkeyurl: "http://oidc:8080/jwk" sync: + api: + password: "pass" + user: "user" centerPrefix: "SYNC" destination: type: "s3" diff --git a/.github/integration/tests/sda/40_mapper_test.sh b/.github/integration/tests/sda/40_mapper_test.sh index 416b2bdd3..dc5234b6a 100644 --- a/.github/integration/tests/sda/40_mapper_test.sh +++ b/.github/integration/tests/sda/40_mapper_test.sh @@ -44,7 +44,7 @@ curl -s -u guest:guest "http://rabbitmq:15672/api/exchanges/sda/sda/publish" \ # check DB for dataset contents RETRY_TIMES=0 -until [ "$(psql -U postgres -h postgres -d sda -At -c "select count(id) from sda.file_dataset where dataset_id = (select id from sda.datasets where stable_id = 'EGAD74900000101')")" -eq 2 ]; do +until [ "$(psql -U postgres -h postgres -d sda -At -c "select count(id) from sda.file_dataset where dataset_id = (select id from sda.datasets where stable_id = 'EGAD74900000101');")" -eq 2 ]; do echo "waiting for mapper to complete" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then @@ -63,7 +63,7 @@ for file in NA12878.bam.c4gh NA12878_20k_b37.bam.c4gh; do fi done -until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.file_event_log where file_id = (select id from sda.files where stable_id = 'EGAF74900000002') order by started_at DESC LIMIT 1")" = "ready" ]; do +until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.file_event_log where file_id = (select id from sda.files where stable_id = 'EGAF74900000002') order by started_at DESC LIMIT 1;")" = "ready" ]; do echo "waiting for files be ready" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then @@ -73,7 +73,7 @@ until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.fil sleep 2 done -until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.dataset_event_log where dataset_id = 'EGAD74900000101' order by event_date DESC LIMIT 1")" = "registered" ]; do +until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.dataset_event_log where dataset_id = 'EGAD74900000101' order by event_date DESC LIMIT 1;")" = "registered" ]; do echo "waiting for dataset be registered" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then @@ -108,7 +108,7 @@ curl -s -u guest:guest "http://rabbitmq:15672/api/exchanges/sda/sda/publish" \ -H 'Content-Type: application/json;charset=UTF-8' \ -d "$release_body" -until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.dataset_event_log where dataset_id = 'EGAD74900000101' order by event_date DESC LIMIT 1")" = "released" ]; do +until [ "$(psql -U postgres -h postgres -d sda -At -c "select event from sda.dataset_event_log where dataset_id = 'EGAD74900000101' order by event_date DESC LIMIT 1;")" = "released" ]; do echo "waiting for dataset be released" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then diff --git a/.github/integration/tests/sda/45_sync_test.sh b/.github/integration/tests/sda/45_sync_test.sh index 0d949bd06..f90a5d014 100644 --- a/.github/integration/tests/sda/45_sync_test.sh +++ b/.github/integration/tests/sda/45_sync_test.sh @@ -20,4 +20,16 @@ for file in NA12878.bai NA12878_20k_b37.bai; do done done -echo "files synced successfully" \ No newline at end of file +echo "files synced successfully" + +echo "waiting for sync-api to send messages" +RETRY_TIMES=0 +until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/catch_all.dead/ | jq -r '.messages_ready')" -eq 5 ]; do + echo "waiting for sync-api to send messages" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "::error::Time out while waiting for sync-api to send messages" + exit 1 + fi + sleep 2 +done \ No newline at end of file From 40fd51228486601e6a1ed0c5204845c025603c2d Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 10:50:10 +0100 Subject: [PATCH 29/34] Cleanup after rebase --- sda/cmd/sync/sync_test.go | 13 ++++++++----- sda/internal/database/db_functions_test.go | 10 +++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/sda/cmd/sync/sync_test.go b/sda/cmd/sync/sync_test.go index f53208e92..cc7efd046 100644 --- a/sda/cmd/sync/sync_test.go +++ b/sda/cmd/sync/sync_test.go @@ -128,7 +128,10 @@ func (suite *SyncTest) SetupTest() { viper.Set("db.password", "rootpasswd") viper.Set("db.database", "sda") viper.Set("db.sslmode", "disable") - viper.Set("centerPrefix", "prefix") + viper.Set("sync.centerPrefix", "prefix") + viper.Set("sync.remote.host", "http://remote.example") + viper.Set("sync.remote.user", "user") + viper.Set("sync.remote.password", "pass") key := "-----BEGIN CRYPT4GH ENCRYPTED PRIVATE KEY-----\nYzRnaC12MQAGc2NyeXB0ABQAAAAAEna8op+BzhTVrqtO5Rx7OgARY2hhY2hhMjBfcG9seTEzMDUAPMx2Gbtxdva0M2B0tb205DJT9RzZmvy/9ZQGDx9zjlObj11JCqg57z60F0KhJW+j/fzWL57leTEcIffRTA==\n-----END CRYPT4GH ENCRYPTED PRIVATE KEY-----" keyPath, _ := os.MkdirTemp("", "key") @@ -159,13 +162,13 @@ func (suite *SyncTest) TestBuildSyncDatasetJSON() { err = db.SetAccessionID("ed6af454-d910-49e3-8cda-488a6f246e67", fileID) assert.NoError(suite.T(), err) - checksum := sha256.New() - fileInfo := database.FileInfo{Checksum: sha256.New(), Size: 1234, Path: "dummy.user/test/file1.c4gh", DecryptedChecksum: checksum, DecryptedSize: 999} + checksum := fmt.Sprintf("%x", sha256.New().Sum(nil)) + fileInfo := database.FileInfo{Checksum: fmt.Sprintf("%x", sha256.New().Sum(nil)), Size: 1234, Path: "dummy.user/test/file1.c4gh", DecryptedChecksum: checksum, DecryptedSize: 999} corrID := uuid.New().String() err = db.SetArchived(fileInfo, fileID, corrID) assert.NoError(suite.T(), err, "failed to mark file as Archived") - err = db.MarkCompleted(fileInfo, fileID, corrID) + err = db.SetVerified(fileInfo, fileID, corrID) assert.NoError(suite.T(), err, "failed to mark file as Verified") accessions := []string{"ed6af454-d910-49e3-8cda-488a6f246e67"} @@ -175,7 +178,7 @@ func (suite *SyncTest) TestBuildSyncDatasetJSON() { jsonData, err := buildSyncDatasetJSON(m) assert.NoError(suite.T(), err) dataset := []byte(`{"dataset_id":"cd532362-e06e-4461-8490-b9ce64b8d9e7","dataset_files":[{"filepath":"dummy.user/test/file1.c4gh","file_id":"ed6af454-d910-49e3-8cda-488a6f246e67","sha256":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}],"user":"dummy.user"}`) - assert.Equal(suite.T(), dataset, jsonData) + assert.Equal(suite.T(), string(dataset), string(jsonData)) } func (suite *SyncTest) TestCreateHostURL() { diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index 16dff65b9..9420731b4 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -393,13 +393,13 @@ func (suite *DatabaseTests) TestGetSyncData() { fileID, err := db.RegisterFile("/testuser/TestGetGetSyncData.c4gh", "testuser") assert.NoError(suite.T(), err, "failed to register file in database") - checksum := sha256.New() - fileInfo := FileInfo{sha256.New(), 1234, "/tmp/TestGetGetSyncData.c4gh", checksum, 999} + checksum := fmt.Sprintf("%x", sha256.New().Sum(nil)) + fileInfo := FileInfo{fmt.Sprintf("%x", sha256.New().Sum(nil)), 1234, "/tmp/TestGetGetSyncData.c4gh", checksum, 999} corrID := uuid.New().String() err = db.SetArchived(fileInfo, fileID, corrID) assert.NoError(suite.T(), err, "failed to mark file as Archived") - err = db.markCompleted(fileInfo, fileID, corrID) + err = db.SetVerified(fileInfo, fileID, corrID) assert.NoError(suite.T(), err, "failed to mark file as Verified") stableID := "TEST:000-1111-2222" @@ -453,9 +453,9 @@ func (suite *DatabaseTests) TestGetArchivePath() { fileID, err := db.RegisterFile("/testuser/TestGetArchivePath-001.c4gh", "testuser") assert.NoError(suite.T(), err, "failed to register file in database") - checksum := sha256.New() + checksum := fmt.Sprintf("%x", sha256.New()) corrID := uuid.New().String() - fileInfo := FileInfo{sha256.New(), 1234, corrID, checksum, 999} + fileInfo := FileInfo{fmt.Sprintf("%x", sha256.New()), 1234, corrID, checksum, 999} err = db.SetArchived(fileInfo, fileID, corrID) assert.NoError(suite.T(), err, "failed to mark file as Archived") From 8d346ea50f33206bd25c63a160ea8c915296a5dc Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 12:23:31 +0100 Subject: [PATCH 30/34] [Integration test] fix after rebase --- .github/integration/tests/sda/10_upload_test.sh | 2 +- .github/integration/tests/sda/21_cancel_test.sh | 6 +++--- .github/integration/tests/sda/30_backup-finalize_test.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/integration/tests/sda/10_upload_test.sh b/.github/integration/tests/sda/10_upload_test.sh index fb3c36b8d..49990f7bf 100644 --- a/.github/integration/tests/sda/10_upload_test.sh +++ b/.github/integration/tests/sda/10_upload_test.sh @@ -34,7 +34,7 @@ done psql -U postgres -h postgres -d sda -At -c "TRUNCATE TABLE sda.files CASCADE;" if [ "$STORAGETYPE" = "posix" ]; then - for file in NA12878.bam NA12878_20k_b37.bam; do + for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do echo "downloading $file" curl -s -L -o /shared/$file "https://github.com/ga4gh/htsget-refserver/raw/main/data/gcp/gatk-test-data/wgs_bam/$file" if [ ! -f "$file.c4gh" ]; then diff --git a/.github/integration/tests/sda/21_cancel_test.sh b/.github/integration/tests/sda/21_cancel_test.sh index a76ab10a0..87f43fa62 100644 --- a/.github/integration/tests/sda/21_cancel_test.sh +++ b/.github/integration/tests/sda/21_cancel_test.sh @@ -86,11 +86,11 @@ curl -k -u guest:guest "http://rabbitmq:15672/api/exchanges/sda/sda/publish" \ -d "$ingest_body" RETRY_TIMES=0 -until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/verified/ | jq -r '.messages_ready')" -eq 3 ]; do - echo "waiting for verify to complete" +until [ "$(curl -su guest:guest http://rabbitmq:15672/api/queues/sda/verified/ | jq -r '.messages_ready')" -eq 5 ]; do + echo "waiting for verify to complete after re-ingestion" RETRY_TIMES=$((RETRY_TIMES + 1)) if [ "$RETRY_TIMES" -eq 30 ]; then - echo "::error::Time out while waiting for verify to complete" + echo "::error::Time out while waiting for verify to complete after re-ingestion" exit 1 fi sleep 2 diff --git a/.github/integration/tests/sda/30_backup-finalize_test.sh b/.github/integration/tests/sda/30_backup-finalize_test.sh index 330597b0e..afb0cef56 100644 --- a/.github/integration/tests/sda/30_backup-finalize_test.sh +++ b/.github/integration/tests/sda/30_backup-finalize_test.sh @@ -4,7 +4,7 @@ set -e cd shared || true i=1 -while [ $i -le 2 ]; do +while [ $i -le 4 ]; do ## get correlation id from upload message MSG=$( curl -s -X POST \ From 3c292b8a69b3de0eff77ae1cc1177787b899ab4b Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Thu, 9 Nov 2023 13:41:44 +0100 Subject: [PATCH 31/34] [sync] [sync-api] Update readme --- sda/cmd/sync/sync.md | 46 ++++++++++++++++++---------- sda/cmd/syncapi/syncapi.md | 63 ++++++++++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 22 deletions(-) diff --git a/sda/cmd/sync/sync.md b/sda/cmd/sync/sync.md index 8e6c9323a..84edf4f2c 100644 --- a/sda/cmd/sync/sync.md +++ b/sda/cmd/sync/sync.md @@ -1,5 +1,7 @@ # Sync +The sync service is used in the [Bigpicture](https://bigpicture.eu/) project. + Copies files from the archive to the sync destination, including the header so that the files can be ingested at the remote site. ## Configuration @@ -22,6 +24,14 @@ export LOG_LEVEL="debug" export LOG_FORMAT="json" ``` +### Service settings + +- `SYNC_CENTERPREFIX`: Prefix of the dataset ID to detect if the dataset was minted locally or not +- `SYNC_REMOTE_HOST`: URL to the remote API host +- `SYNC_REMOTE_POST`: Port for the remote API host, if other than the standard HTTP(S) ports +- `SYNC_REMOTE_USER`: Username for connecting to the remote API +- `SYNC_REMOTE_PASSWORD`: Password for the API user + ### Keyfile settings These settings control which crypt4gh keyfile is loaded. @@ -36,7 +46,7 @@ These settings control how sync connects to the RabbitMQ message broker. - `BROKER_HOST`: hostname of the rabbitmq server - `BROKER_PORT`: rabbitmq broker port (commonly `5671` with TLS and `5672` without) -- `BROKER_QUEUE`: message queueor stream to read messages from (commonly `completed_stream`) +- `BROKER_QUEUE`: message queue or stream to read messages from (commonly `mapping_stream`) - `BROKER_USER`: username to connect to rabbitmq - `BROKER_PASSWORD`: password to connect to rabbitmq - `BROKER_PREFETCHCOUNT`: Number of messages to pull from the message server at the time (default to 2) @@ -112,23 +122,27 @@ and if `*_TYPE` is `SFTP`: The sync service copies files from the archive storage to sync storage. -When running, sync reads messages from the "completed" RabbitMQ queue. +When running, sync reads messages from the "mapping_stream" RabbitMQ queue. For each message, these steps are taken (if not otherwise noted, errors halts progress, the message is Nack'ed, and the service moves on to the next message): -1. The message is validated as valid JSON that matches the "ingestion-completion" schema. If the message can’t be validated it is sent to the error queue for later analysis. -2. The archive file path and file size is fetched from the database. -3. The file size on disk is requested from the storage system. -4. The archive file size from the database is compared against the disk file size. -5. A file reader is created for the archive storage file, and a file writer is created for the sync storage file. - 1. The header is read from the database. - 2. The header is decrypted. - 3. The header is reencrypted with the destinations public key. - 4. The header is written to the sync file writer. -6. The file data is copied from the archive file reader to the sync file writer. -7. The message is Ack'ed. +1. The message is validated as valid JSON that matches the "dataset-mapping" schema. If the message can’t be validated it is sent to the error queue for later analysis. +2. Checks where the dataset is created by comparing the center prefix on the dataset ID, if it is a remote ID processing stops. +3. For each stable ID in the dataset the following is performed: + 1. The archive file path and file size is fetched from the database. + 2. The file size on disk is requested from the storage system. + 3. A file reader is created for the archive storage file, and a file writer is created for the sync storage file. + 1. The header is read from the database. + 2. The header is decrypted. + 3. The header is reencrypted with the destinations public key. + 4. The header is written to the sync file writer. + 4. The file data is copied from the archive file reader to the sync file writer. +4. Once all files have been copied to the destination a JSON struct is created acording to `file-sync` schema. +5. A POST message is sent to the remote api host with the JSON data. +6. The message is Ack'ed. ## Communication -- Sync reads messages from one rabbitmq stream (`completed_stream`) -- Sync reads file information and headers from the database and can not be started without a database connection. This is done using the `GetArchived`, and `GetHeaderForStableID` functions. -- Sync reads data from archive storage and writes data to sync destination storage. +- Sync reads messages from one rabbitmq stream (`mapping_stream`) +- Sync reads file information and headers from the database and can not be started without a database connection. +- Sync re-encrypts the header with the receiving end's public key. +- Sync reads data from archive storage and writes data to sync destination storage with the re-encrypted headers attached. diff --git a/sda/cmd/syncapi/syncapi.md b/sda/cmd/syncapi/syncapi.md index 915f5dbe0..d650e7612 100644 --- a/sda/cmd/syncapi/syncapi.md +++ b/sda/cmd/syncapi/syncapi.md @@ -1,6 +1,60 @@ # sync-api -The sync service is used in the [Bigpicture](https://bigpicture.eu/) project. +The sync-api service is used in the [Bigpicture](https://bigpicture.eu/) project. + +## Configuration + +There are a number of options that can be set for the sync service. +These settings can be set by mounting a yaml-file at `/config.yaml` with settings. + +ex. + +```yaml +log: + level: "debug" + format: "json" +``` + +They may also be set using environment variables like: + +```bash +export LOG_LEVEL="debug" +export LOG_FORMAT="json" +``` + +### Service settings + +- `SYNC_API_PASSWORD`: password for the API user +- `SYNC_API_USER`: User that will be allowed to send POST requests to the API + +### RabbitMQ broker settings + +These settings control how sync connects to the RabbitMQ message broker. + +- `BROKER_HOST`: hostname of the rabbitmq server +- `BROKER_PORT`: rabbitmq broker port (commonly `5671` with TLS and `5672` without) +- `BROKER_EXCHANGE`: exchange to send messages to +- `BROKER_USER`: username to connect to rabbitmq +- `BROKER_PASSWORD`: password to connect to rabbitmq +- `BROKER_PREFETCHCOUNT`: Number of messages to pull from the message server at the time (default to 2) + +The default routing keys for sending ingestion, accession and maping messages can be overridden by setting the following values: + +- `SYNC_API_ACCESSIONROUTING` +- `SYNC_API_INGESTROUTING` +- `SYNC_API_MAPPINGROUTING` + +### Logging settings + +- `LOG_FORMAT` can be set to “json” to get logs in json format. All other values result in text logging +- `LOG_LEVEL` can be set to one of the following, in increasing order of severity: + - `trace` + - `debug` + - `info` + - `warn` (or `warning`) + - `error` + - `fatal` + - `panic` ## Service Description @@ -8,11 +62,8 @@ The sync service facilitates replication of data and metadata between the nodes When enabled the service will perform the following tasks: -1. Read messages from the configured queue (sent by the mapper service upon succesful completion of a dataset maping). - 1. Generate a JSON blob with the required file and dataset information required to start and complete ingestion of a dataset on the recieving node. - 2. Send the JSON blob as POST request to the recieving partner. -2. Upon recieving a POST request with JSON data to the `/dataset` route. - 1. Parse the JSON blob and check if dataset is already registered, exit if true. +1. Upon recieving a POST request with JSON data to the `/dataset` route. + 1. Parse the JSON blob and validate it against the `file-sync` schema. 2. Build and send messages to start ingestion of files. 3. Build and send messages to assign stableIDs to files. 4. Build and send messages to map files to a dataset. From 7fd60da217fa0661379b9e914351148eb6606bf9 Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Wed, 15 Nov 2023 14:25:00 +0100 Subject: [PATCH 32/34] [json] for isolated setup we don't care for MD5 sums --- sda/schemas/isolated/ingestion-completion.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sda/schemas/isolated/ingestion-completion.json b/sda/schemas/isolated/ingestion-completion.json index d57ec3768..f6ce335da 100644 --- a/sda/schemas/isolated/ingestion-completion.json +++ b/sda/schemas/isolated/ingestion-completion.json @@ -117,7 +117,7 @@ "$id": "#/properties/decrypted_checksums", "type": "array", "title": "The checksums of the original file", - "description": "The checksums of the original file. The md5 one is required", + "description": "The checksums of the original file.", "examples": [ [ { @@ -134,7 +134,7 @@ "type": "object", "properties": { "type": { - "const": "md5" + "const": "sha256" } }, "required": [ From d8a41cc1a4722e6ac5907a94ce26569ee4561ccd Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Wed, 15 Nov 2023 14:33:11 +0100 Subject: [PATCH 33/34] [sync-api] create correct correlation ids --- sda/cmd/syncapi/syncapi.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sda/cmd/syncapi/syncapi.go b/sda/cmd/syncapi/syncapi.go index 7b038c83d..58d69984a 100644 --- a/sda/cmd/syncapi/syncapi.go +++ b/sda/cmd/syncapi/syncapi.go @@ -13,6 +13,7 @@ import ( "syscall" "time" + "github.com/google/uuid" "github.com/gorilla/mux" "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" @@ -172,8 +173,8 @@ func parseDatasetMessage(msg []byte) error { if err != nil { return fmt.Errorf("failed to marshal json messge: Reason %v", err) } - - if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, Conf.SyncAPI.IngestRouting, ingestMsg); err != nil { + corrID := uuid.New().String() + if err := Conf.API.MQ.SendMessage(corrID, Conf.Broker.Exchange, Conf.SyncAPI.IngestRouting, ingestMsg); err != nil { return fmt.Errorf("failed to send ingest messge: Reason %v", err) } @@ -190,7 +191,7 @@ func parseDatasetMessage(msg []byte) error { return fmt.Errorf("failed to marshal json messge: Reason %v", err) } - if err := Conf.API.MQ.SendMessage(fmt.Sprintf("%v", time.Now().Unix()), Conf.Broker.Exchange, Conf.SyncAPI.AccessionRouting, finalizeMsg); err != nil { + if err := Conf.API.MQ.SendMessage(corrID, Conf.Broker.Exchange, Conf.SyncAPI.AccessionRouting, finalizeMsg); err != nil { return fmt.Errorf("failed to send mapping messge: Reason %v", err) } } From a1db42b21c27982b397aa20154d80794e399ab3b Mon Sep 17 00:00:00 2001 From: Joakim Bygdell Date: Wed, 15 Nov 2023 14:37:38 +0100 Subject: [PATCH 34/34] [sync-api] fix comment --- sda/cmd/syncapi/syncapi.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sda/cmd/syncapi/syncapi.go b/sda/cmd/syncapi/syncapi.go index 58d69984a..3ceaead07 100644 --- a/sda/cmd/syncapi/syncapi.go +++ b/sda/cmd/syncapi/syncapi.go @@ -156,7 +156,7 @@ func dataset(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) } -// parsemessage parses the JSON blob and sends the relevant messages +// parseDatasetMessage parses the JSON blob and sends the relevant messages func parseDatasetMessage(msg []byte) error { log.Debugf("incoming blob %s", msg) blob := syncDataset{}