From 629e37db18429a9198dac9b4e2a91ee24f100e51 Mon Sep 17 00:00:00 2001 From: dbampalikis Date: Thu, 7 Nov 2024 16:30:53 +0100 Subject: [PATCH 01/18] [postgres] Add api user priviledges --- postgresql/initdb.d/01_main.sql | 3 ++- postgresql/initdb.d/04_grants.sql | 3 ++- postgresql/migratedb.d/14.sql | 2 +- postgresql/migratedb.d/15.sql | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 postgresql/migratedb.d/15.sql diff --git a/postgresql/initdb.d/01_main.sql b/postgresql/initdb.d/01_main.sql index cc27cf3df..abbe307f6 100644 --- a/postgresql/initdb.d/01_main.sql +++ b/postgresql/initdb.d/01_main.sql @@ -28,7 +28,8 @@ VALUES (0, now(), 'Created with version'), (11, now(), 'Grant select permission to download on dataset_event_log'), (12, now(), 'Add key hash'), (13, now(), 'Create API user'), - (14, now(), 'Create Auth user'); + (14, now(), 'Create Auth user'), + (15, now(), 'Give API user insert priviledge in logs table'); -- Datasets are used to group files, and permissions are set on the dataset -- level diff --git a/postgresql/initdb.d/04_grants.sql b/postgresql/initdb.d/04_grants.sql index aa8a423f4..0cc65072d 100644 --- a/postgresql/initdb.d/04_grants.sql +++ b/postgresql/initdb.d/04_grants.sql @@ -164,12 +164,13 @@ GRANT USAGE ON SCHEMA sda TO api; GRANT SELECT ON sda.files TO api; GRANT SELECT ON sda.file_dataset TO api; GRANT SELECT ON sda.checksums TO api; -GRANT SELECT ON sda.file_event_log TO api; +GRANT SELECT, INSERT ON sda.file_event_log TO api; GRANT SELECT ON sda.encryption_keys TO api; GRANT SELECT ON sda.datasets TO api; GRANT SELECT ON sda.dataset_event_log TO api; GRANT INSERT ON sda.encryption_keys TO api; GRANT UPDATE ON sda.encryption_keys TO api; +GRANT USAGE, SELECT ON SEQUENCE sda.file_event_log_id_seq TO api; -- legacy schema GRANT USAGE ON SCHEMA local_ega TO api; diff --git a/postgresql/migratedb.d/14.sql b/postgresql/migratedb.d/14.sql index 24a9cff6e..f39a19af1 100644 --- a/postgresql/migratedb.d/14.sql +++ b/postgresql/migratedb.d/14.sql @@ -45,4 +45,4 @@ BEGIN RAISE NOTICE 'Schema migration from % to % does not apply now, skipping', sourcever, sourcever+1; END IF; END -$$ \ No newline at end of file +$$ diff --git a/postgresql/migratedb.d/15.sql b/postgresql/migratedb.d/15.sql new file mode 100644 index 000000000..2849382c4 --- /dev/null +++ b/postgresql/migratedb.d/15.sql @@ -0,0 +1,22 @@ + +DO +$$ +DECLARE +-- The version we know how to do migration from, at the end of a successful migration +-- we will no longer be at this version. + sourcever INTEGER := 14; + changes VARCHAR := 'Give API user insert priviledge in logs table'; +BEGIN + IF (select max(version) from sda.dbschema_version) = sourcever then + RAISE NOTICE 'Doing migration from schema version % to %', sourcever, sourcever+1; + RAISE NOTICE 'Changes: %', changes; + INSERT INTO sda.dbschema_version VALUES(sourcever+1, now(), changes); + + GRANT INSERT ON sda.file_event_log TO api; + GRANT USAGE, SELECT ON SEQUENCE sda.file_event_log_id_seq TO api; + + ELSE + RAISE NOTICE 'Schema migration from % to % does not apply now, skipping', sourcever, sourcever+1; + END IF; +END +$$ From 4f9fcb9573e687db9eb2fa68ab1f928307dc8a6b Mon Sep 17 00:00:00 2001 From: dbampalikis Date: Thu, 7 Nov 2024 16:32:29 +0100 Subject: [PATCH 02/18] [api] Add api configuration --- sda/internal/config/config.go | 12 ++++++++++++ sda/internal/config/config_test.go | 1 + 2 files changed, 13 insertions(+) diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index ae08188da..3b5e149fc 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -83,6 +83,7 @@ type APIConf struct { Session SessionConfig DB *database.SDAdb MQ *broker.AMQPBroker + INBOX storage.Backend } type SessionConfig struct { @@ -214,6 +215,15 @@ func NewConfig(app string) (*Config, error) { "db.user", "db.password", "db.database", + "inbox.type", + } + switch viper.GetString("inbox.type") { + case S3: + requiredConfVars = append(requiredConfVars, []string{"inbox.url", "inbox.accesskey", "inbox.secretkey", "inbox.bucket"}...) + case POSIX: + requiredConfVars = append(requiredConfVars, []string{"inbox.location"}...) + default: + return nil, fmt.Errorf("inbox.type not set") } case "auth": requiredConfVars = []string{ @@ -464,6 +474,8 @@ func NewConfig(app string) (*Config, error) { return nil, err } + c.configInbox() + err = c.configAPI() if err != nil { return nil, err diff --git a/sda/internal/config/config_test.go b/sda/internal/config/config_test.go index 3b877cb1b..563fcca3f 100644 --- a/sda/internal/config/config_test.go +++ b/sda/internal/config/config_test.go @@ -65,6 +65,7 @@ func (suite *ConfigTestSuite) SetupTest() { viper.Set("inbox.accesskey", "testaccess") viper.Set("inbox.secretkey", "testsecret") viper.Set("inbox.bucket", "testbucket") + viper.Set("inbox.type", "s3") viper.Set("server.jwtpubkeypath", "testpath") viper.Set("log.level", "debug") } From 2c94aa02bdaa6aa720ba651a718f0c8190e680d4 Mon Sep 17 00:00:00 2001 From: dbampalikis Date: Thu, 7 Nov 2024 16:33:02 +0100 Subject: [PATCH 03/18] [api] Add fileID to api struct --- sda/internal/database/database.go | 1 + 1 file changed, 1 insertion(+) diff --git a/sda/internal/database/database.go b/sda/internal/database/database.go index 651e097c0..eed386a8c 100644 --- a/sda/internal/database/database.go +++ b/sda/internal/database/database.go @@ -46,6 +46,7 @@ type SyncData struct { } type SubmissionFileInfo struct { + FileID string `json:"fileID"` InboxPath string `json:"inboxPath"` Status string `json:"fileStatus"` CreateAt string `json:"createAt"` From 6815248748765f5809f47ed5f6bf1b495041a106 Mon Sep 17 00:00:00 2001 From: dbampalikis Date: Thu, 7 Nov 2024 16:36:07 +0100 Subject: [PATCH 04/18] [api] Change list to return id and skip disabled --- sda/internal/database/db_functions.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index bf8843db7..ca445a403 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -638,7 +638,7 @@ func (dbs *SDAdb) getUserFiles(userID string) ([]*SubmissionFileInfo, error) { db := dbs.DB // select all files (that are not part of a dataset) of the user, each one annotated with its latest event - const query = "SELECT f.submission_file_path, e.event, f.created_at FROM sda.files f " + + const query = "SELECT f.id, f.submission_file_path, e.event, f.created_at FROM sda.files f " + "LEFT JOIN (SELECT DISTINCT ON (file_id) file_id, started_at, event FROM sda.file_event_log ORDER BY file_id, started_at DESC) e ON f.id = e.file_id WHERE f.submission_user = $1 " + "AND f.id NOT IN (SELECT f.id FROM sda.files f RIGHT JOIN sda.file_dataset d ON f.id = d.file_id); " @@ -653,13 +653,15 @@ func (dbs *SDAdb) getUserFiles(userID string) ([]*SubmissionFileInfo, error) { for rows.Next() { // Read rows into struct fi := &SubmissionFileInfo{} - err := rows.Scan(&fi.InboxPath, &fi.Status, &fi.CreateAt) + err := rows.Scan(&fi.FileID, &fi.InboxPath, &fi.Status, &fi.CreateAt) if err != nil { return nil, err } - // Add instance of struct (file) to array - files = append(files, fi) + // Add instance of struct (file) to array if the status is not disabled + if fi.Status != "disabled" { + files = append(files, fi) + } } return files, nil From 8091441789abab0a8704f29ee55fa3fa6eb512c0 Mon Sep 17 00:00:00 2001 From: dbampalikis Date: Thu, 7 Nov 2024 16:37:07 +0100 Subject: [PATCH 05/18] [api] Add disable file event log function --- sda/internal/database/db_functions.go | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index ca445a403..4ddf815c9 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -62,6 +62,37 @@ func (dbs *SDAdb) getFileID(corrID string) (string, error) { return fileID, nil } +// UserFileExists checks if a file exists in the database +// for a given user and fileID +func (dbs *SDAdb) GetFilePathFromID(submission_user, fileID string) (string, error) { + var ( + err error + count int + filePath string + ) + + for count == 0 || (err != nil && count < RetryTimes) { + filePath, err = dbs.getFilePathFromID(submission_user, fileID) + count++ + } + + return filePath, err +} +func (dbs *SDAdb) getFilePathFromID(submission_user, fileID string) (string, error) { + dbs.checkAndReconnectIfNeeded() + db := dbs.DB + + const getFilePath = "SELECT submission_file_path from sda.files where submission_user= $1 and id = $2;" + + var filePath string + err := db.QueryRow(getFilePath, submission_user, fileID).Scan(&filePath) + if err != nil { + return "", err + } + + return filePath, nil +} + // UpdateFileEventLog updates the status in of the file in the database. // The message parameter is the rabbitmq message sent on file upload. func (dbs *SDAdb) UpdateFileEventLog(fileUUID, event, corrID, user, details, message string) error { From 842b5d3c2832ca14d4d4389aecc4e8aae3e460d0 Mon Sep 17 00:00:00 2001 From: dbampalikis Date: Thu, 7 Nov 2024 16:37:48 +0100 Subject: [PATCH 06/18] [api] Add delete file functionality --- sda/cmd/api/api.go | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index c87659f2e..45b2ea13e 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -8,6 +8,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "math" "net/http" "os" "os/signal" @@ -25,6 +26,7 @@ import ( "github.com/neicnordic/sensitive-data-archive/internal/database" "github.com/neicnordic/sensitive-data-archive/internal/jsonadapter" "github.com/neicnordic/sensitive-data-archive/internal/schema" + "github.com/neicnordic/sensitive-data-archive/internal/storage" "github.com/neicnordic/sensitive-data-archive/internal/userauth" log "github.com/sirupsen/logrus" ) @@ -54,6 +56,10 @@ func main() { if err != nil { log.Fatal(err) } + Conf.API.INBOX, err = storage.NewBackend(Conf.Inbox) + if err != nil { + log.Fatal(err) + } if err := setupJwtAuth(); err != nil { log.Fatalf("error when setting up JWT auth, reason %s", err.Error()) @@ -100,6 +106,7 @@ func setup(config *config.Config) *http.Server { r.POST("/c4gh-keys/add", rbac(e), addC4ghHash) // Adds a key hash to the database r.GET("/c4gh-keys/list", rbac(e), listC4ghHashes) // Lists key hashes in the database r.POST("/c4gh-keys/deprecate/*keyHash", rbac(e), deprecateC4ghHash) // Deprecate a given key hash + r.DELETE("/file/:username/*file", rbac(e), deleteFile) // Delete a file from inbox // submission endpoints below here r.POST("/file/ingest", rbac(e), ingestFile) // start ingestion of a file r.POST("/file/accession", rbac(e), setAccession) // assign accession ID to a file @@ -283,6 +290,66 @@ func ingestFile(c *gin.Context) { c.Status(http.StatusOK) } +// The deleteFile function will take the user id and the file id +// because the user id won't be in the path of the file in the future +// therefore we need to make the list files, get the file id and then +// call the delete file +func deleteFile(c *gin.Context) { + + inbox, err := storage.NewBackend(Conf.Inbox) + if err != nil { + log.Fatal(err) + } + + submissionUser := c.Param("username") + log.Warn("submission user:", submissionUser) + + // TODO: Add check for the input file path + fileID := c.Param("file") + fileID = strings.TrimPrefix(fileID, "/") + log.Warn("submission file:", fileID) + if fileID == "" { + c.AbortWithStatusJSON(http.StatusBadRequest, "file ID is required") + } + + filePath := "" + // Get the file path from the fileID and submission user + if filePath, err = Conf.API.DB.GetFilePathFromID(submissionUser, fileID); err != nil { + log.Errorf("getting file from fileID failed, reason: (%v)", err) + c.AbortWithStatusJSON(http.StatusNotFound, "File could not be found in inbox") + + return + } + + // Requires a filepath instead of fileID + // TODO: The remove fails randomly sometimes, maybe we should retry + var RetryTimes = 5 + for count := 1; count <= RetryTimes; count++ { + log.Warn("trying to remove file from inbox, try", count) + err = inbox.RemoveFile(filePath) + if err != nil { + log.Errorf("Remove file from inbox failed, reason: %v", err) + } + + // The GetFileSize returns zero in case of error after retrying a number of times + fileSize, _ := inbox.GetFileSize(filePath) + if fileSize == 0 { + break + } + + time.Sleep(time.Duration(math.Pow(2, float64(count))) * time.Second) + } + + if err := Conf.API.DB.UpdateFileEventLog(fileID, "disabled", fileID, "api", "{}", "{}"); err != nil { + log.Errorf("set status deleted failed, reason: (%v)", err) + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + + return + } + + c.Status(http.StatusOK) +} + func setAccession(c *gin.Context) { var accession schema.IngestionAccession if err := c.BindJSON(&accession); err != nil { @@ -476,6 +543,8 @@ func listActiveUsers(c *gin.Context) { c.JSON(http.StatusOK, users) } +// listUserFiles returns a list of files for a specific user +// If the file has status disabled, the file will be skipped func listUserFiles(c *gin.Context) { username := c.Param("username") username = strings.TrimPrefix(username, "/") From b7bdca85d70f5e03e0ea222c759324649616206d Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 18 Nov 2024 13:30:16 +0100 Subject: [PATCH 07/18] [api] add integration test --- .../tests/sda/60_api_admin_test.sh | 40 +++++++++++++++++++ sda/internal/config/config.go | 1 - 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh index 2b01bc525..c02e447b3 100644 --- a/.github/integration/tests/sda/60_api_admin_test.sh +++ b/.github/integration/tests/sda/60_api_admin_test.sh @@ -1,5 +1,6 @@ #!/bin/sh set -e +cd shared || true token="$(curl http://oidc:8080/tokens | jq -r '.[0]')" result="$(curl -sk -L "http://api:8080/users/test@dummy.org/files" -H "Authorization: Bearer $token" | jq '. | length')" @@ -7,4 +8,43 @@ if [ "$result" -ne 2 ]; then echo "wrong number of files returned for user test@dummy.org" echo "expected 4 got $result" exit 1 +fi + + +## reupload a file under a different name, test to delete it +s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NC12878.bam.c4gh + +echo "waiting for upload to complete" +URI=http://rabbitmq:15672 +if [ -n "$PGSSLCERT" ]; then + URI=https://rabbitmq:15671 +fi +RETRY_TIMES=0 +until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -gt 0 ]; do + echo "waiting for upload to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 10 ]; then + echo "::error::Time out while waiting for upload to complete" + exit 1 + fi + echo "read now:" `curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"'` + sleep 2 +done + +# get the fileId of the new file +fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')" +echo "Found uploaded file " $fileid + +# delete it +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")" +if [ "$resp" != "200" ]; then + echo "Error when deleting the file, expected 200 got: $resp" + exit 1 +fi + +last_event=$(psql -U postgres -h postgres -d sda -At -c "SELECT event FROM sda.file_event_log WHERE file_id='$fileid' order by started_at desc limit 1;") +echo "last event was" $last_event + +if [[ "$last_event" != "disabled" ]]; then + echo "The file $fileid does not have the expected las event 'disabled', but $last_event." fi \ No newline at end of file diff --git a/sda/internal/config/config.go b/sda/internal/config/config.go index 3b5e149fc..cdc1472c5 100644 --- a/sda/internal/config/config.go +++ b/sda/internal/config/config.go @@ -215,7 +215,6 @@ func NewConfig(app string) (*Config, error) { "db.user", "db.password", "db.database", - "inbox.type", } switch viper.GetString("inbox.type") { case S3: From 8aeb2c5c963794f122f6660ac1bfffbb4731c338 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 18 Nov 2024 14:59:31 +0100 Subject: [PATCH 08/18] [api delete] only search for unarchived files --- sda/cmd/api/api.go | 2 +- sda/internal/database/db_functions.go | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index 45b2ea13e..0068a17b5 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -314,7 +314,7 @@ func deleteFile(c *gin.Context) { filePath := "" // Get the file path from the fileID and submission user - if filePath, err = Conf.API.DB.GetFilePathFromID(submissionUser, fileID); err != nil { + if filePath, err = Conf.API.DB.GetInboxFilePathFromID(submissionUser, fileID); err != nil { log.Errorf("getting file from fileID failed, reason: (%v)", err) c.AbortWithStatusJSON(http.StatusNotFound, "File could not be found in inbox") diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index 4ddf815c9..b42a7bbd7 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -62,9 +62,9 @@ func (dbs *SDAdb) getFileID(corrID string) (string, error) { return fileID, nil } -// UserFileExists checks if a file exists in the database -// for a given user and fileID -func (dbs *SDAdb) GetFilePathFromID(submission_user, fileID string) (string, error) { +// GetInboxFilePathFromID checks if a file exists in the database for a given user and fileID +// and that is not yet archived +func (dbs *SDAdb) GetInboxFilePathFromID(submissionUser, fileID string) (string, error) { var ( err error count int @@ -72,17 +72,21 @@ func (dbs *SDAdb) GetFilePathFromID(submission_user, fileID string) (string, err ) for count == 0 || (err != nil && count < RetryTimes) { - filePath, err = dbs.getFilePathFromID(submission_user, fileID) + filePath, err = dbs.getInboxFilePathFromID(submissionUser, fileID) count++ } return filePath, err } -func (dbs *SDAdb) getFilePathFromID(submission_user, fileID string) (string, error) { +func (dbs *SDAdb) getInboxFilePathFromID(submission_user, fileID string) (string, error) { dbs.checkAndReconnectIfNeeded() db := dbs.DB - const getFilePath = "SELECT submission_file_path from sda.files where submission_user= $1 and id = $2;" + const getFilePath = "SELECT submission_file_path from sda.files where " + + "submission_user= $1 and id = $2 " + + "AND EXISTS (SELECT 1 FROM " + + "(SELECT event from sda.file_event_log where file_id = $2 order by started_at desc limit 1) " + + "as subquery WHERE event in ('registered', 'uploaded', 'submitted', 'ingested'))" var filePath string err := db.QueryRow(getFilePath, submission_user, fileID).Scan(&filePath) From 9cc1d89ab125581c44f9ff10d99fa92152eeb67c Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 19 Nov 2024 11:52:49 +0100 Subject: [PATCH 09/18] [tests] api admin tests --- .../tests/sda/60_api_admin_test.sh | 109 +++++++++++++++--- 1 file changed, 90 insertions(+), 19 deletions(-) diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh index c02e447b3..faab301da 100644 --- a/.github/integration/tests/sda/60_api_admin_test.sh +++ b/.github/integration/tests/sda/60_api_admin_test.sh @@ -3,48 +3,119 @@ set -e cd shared || true token="$(curl http://oidc:8080/tokens | jq -r '.[0]')" +# Upload a file and mke sure it's listed result="$(curl -sk -L "http://api:8080/users/test@dummy.org/files" -H "Authorization: Bearer $token" | jq '. | length')" if [ "$result" -ne 2 ]; then echo "wrong number of files returned for user test@dummy.org" - echo "expected 4 got $result" + echo "expected 2 got $result" exit 1 fi -## reupload a file under a different name, test to delete it -s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NC12878.bam.c4gh +# Reupload a file under a different name, test to delete it +s3cmd -c s3cfg put "NA12878.bam.c4gh" s3://test_dummy.org/NC12878.bam.c4gh echo "waiting for upload to complete" URI=http://rabbitmq:15672 if [ -n "$PGSSLCERT" ]; then - URI=https://rabbitmq:15671 + URI=https://rabbitmq:15671 fi RETRY_TIMES=0 -until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -gt 0 ]; do - echo "waiting for upload to complete" - RETRY_TIMES=$((RETRY_TIMES + 1)) - if [ "$RETRY_TIMES" -eq 10 ]; then - echo "::error::Time out while waiting for upload to complete" - exit 1 - fi - echo "read now:" `curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"'` - sleep 2 +until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 4 ]; do + echo "waiting for upload to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "::error::Time out while waiting for upload to complete" + exit 1 + fi + sleep 2 done # get the fileId of the new file fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')" -echo "Found uploaded file " $fileid # delete it resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")" if [ "$resp" != "200" ]; then - echo "Error when deleting the file, expected 200 got: $resp" - exit 1 + echo "Error when deleting the file, expected 200 got: $resp" + exit 1 fi last_event=$(psql -U postgres -h postgres -d sda -At -c "SELECT event FROM sda.file_event_log WHERE file_id='$fileid' order by started_at desc limit 1;") -echo "last event was" $last_event -if [[ "$last_event" != "disabled" ]]; then - echo "The file $fileid does not have the expected las event 'disabled', but $last_event." +if [ "$last_event" != "disabled" ]; then + echo "The file $fileid does not have the expected las event 'disabled', but $last_event." +fi + + +# Try to delete an unknown file +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/badfileid")" +if [ "$resp" != "404" ]; then + echo "Error when deleting the file, expected error got: $resp" + exit 1 +fi + + +# Try to delete file of other user +fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/requester@demo.org/files" | jq -r '.[0]| .fileID')" +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")" +if [ "$resp" != "404" ]; then + echo "Error when deleting the file, expected 404 got: $resp" + exit 1 +fi + + +# Re-upload the file and use the api to ingest it, then try to delete it +s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NE12878.bam.c4gh + +URI=http://rabbitmq:15672 +if [ -n "$PGSSLCERT" ]; then + URI=https://rabbitmq:15671 +fi +RETRY_TIMES=0 +until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 6 ]; do + echo "waiting for upload to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 3 ]; then + echo "::error::Time out while waiting for upload to complete" + #exit 1 + break + fi + sleep 2 +done + +# Ingest it +new_payload=$( +jq -c -n \ + --arg filepath "test_dummy.org/NE12878.bam.c4gh" \ + --arg user "test@dummy.org" \ + '$ARGS.named' +) + +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d "$new_payload" "http://api:8080/file/ingest")" +if [ "$resp" != "200" ]; then + echo "Error when requesting to ingesting file, expected 200 got: $resp" + exit 1 +fi + +fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')" +# wait for the fail to get the correct status +RETRY_TIMES=0 + +until [ "$(psql -U postgres -h postgres -d sda -At -c "select id from sda.file_events e where e.title in (select event from sda.file_event_log where file_id = '$fileid' order by started_at desc limit 1);")" -gt 30 ]; do + echo "waiting for ingest to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "::error::Time out while waiting for upload to complete" + exit 1 + fi + sleep 2 +done + +# Try to delete file not in inbox +fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')" +resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")" +if [ "$resp" != "404" ]; then + echo "Error when deleting the file, expected 404 got: $resp" + exit 1 fi \ No newline at end of file From edc3cf89cbef95656437732589f88295cd3517ea Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 18 Nov 2024 14:59:31 +0100 Subject: [PATCH 10/18] [api delete] only search for unarchived files --- sda/internal/database/db_functions.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index b42a7bbd7..234203f19 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -78,7 +78,8 @@ func (dbs *SDAdb) GetInboxFilePathFromID(submissionUser, fileID string) (string, return filePath, err } -func (dbs *SDAdb) getInboxFilePathFromID(submission_user, fileID string) (string, error) { + +func (dbs *SDAdb) getInboxFilePathFromID(submissionUser, fileID string) (string, error) { dbs.checkAndReconnectIfNeeded() db := dbs.DB @@ -89,7 +90,7 @@ func (dbs *SDAdb) getInboxFilePathFromID(submission_user, fileID string) (string "as subquery WHERE event in ('registered', 'uploaded', 'submitted', 'ingested'))" var filePath string - err := db.QueryRow(getFilePath, submission_user, fileID).Scan(&filePath) + err := db.QueryRow(getFilePath, submissionUser, fileID).Scan(&filePath) if err != nil { return "", err } From 11e6330749fb5ff16793cc0568c2cc1d2cc91dcb Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Tue, 19 Nov 2024 14:40:53 +0100 Subject: [PATCH 11/18] [tests] unit test for db function --- sda/internal/database/db_functions_test.go | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sda/internal/database/db_functions_test.go b/sda/internal/database/db_functions_test.go index b71b8e6b8..fbeb82402 100644 --- a/sda/internal/database/db_functions_test.go +++ b/sda/internal/database/db_functions_test.go @@ -981,3 +981,28 @@ func (suite *DatabaseTests) TestUpdateUserInfo_newInfo() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), groups, dbgroups) } + +func (suite *DatabaseTests) TestGetInboxFilePathFromID() { + + db, err := NewSDAdb(suite.dbConf) + assert.NoError(suite.T(), err, "got (%v) when creating new connection", err) + + user := "UserX" + filePath := fmt.Sprintf("/%v/Deletefile1.c4gh", user) + fileID, err := db.RegisterFile(filePath, user) + if err != nil { + suite.FailNow("Failed to register file") + } + err = db.UpdateFileEventLog(fileID, "uploaded", fileID, "User-z", "{}", "{}") + if err != nil { + suite.FailNow("Failed to update file event log") + } + path, err := db.getInboxFilePathFromID(user, fileID) + assert.NoError(suite.T(), err) + assert.Equal(suite.T(), path, filePath) + + err = db.UpdateFileEventLog(fileID, "archived", fileID, user, "{}", "{}") + assert.NoError(suite.T(), err) + _, err = db.getInboxFilePathFromID(user, fileID) + assert.Error(suite.T(), err) +} From d82442bdc4b5b43a7839d38d62c8138082a881df Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 21 Nov 2024 08:24:10 +0100 Subject: [PATCH 12/18] [api] allow deletion of files marked with error --- sda/internal/database/db_functions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index 234203f19..7682dd55a 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -87,7 +87,7 @@ func (dbs *SDAdb) getInboxFilePathFromID(submissionUser, fileID string) (string, "submission_user= $1 and id = $2 " + "AND EXISTS (SELECT 1 FROM " + "(SELECT event from sda.file_event_log where file_id = $2 order by started_at desc limit 1) " + - "as subquery WHERE event in ('registered', 'uploaded', 'submitted', 'ingested'))" + "as subquery WHERE event in ('registered', 'uploaded', 'submitted', 'ingested', 'error'))" var filePath string err := db.QueryRow(getFilePath, submissionUser, fileID).Scan(&filePath) From 2dba5ca2d1d800ead18abdf74a3527ef3be9fb1e Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 21 Nov 2024 08:40:54 +0100 Subject: [PATCH 13/18] [charts] add inbox conf to api --- charts/sda-svc/templates/api-deploy.yaml | 49 ++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/charts/sda-svc/templates/api-deploy.yaml b/charts/sda-svc/templates/api-deploy.yaml index ea68efe54..5d0486baf 100644 --- a/charts/sda-svc/templates/api-deploy.yaml +++ b/charts/sda-svc/templates/api-deploy.yaml @@ -147,6 +147,40 @@ spec: value: {{ required "A valid DB host is required" .Values.global.db.host | quote }} - name: DB_PORT value: {{ .Values.global.db.port | quote }} + - name: INBOX_TYPE + {{- if eq "s3" .Values.global.inbox.storageType }} + value: "s3" + - name: INBOX_BUCKET + value: {{ required "S3 inbox bucket missing" .Values.global.inbox.s3Bucket }} + {{- if and .Values.global.inbox.s3CaFile .Values.global.tls.enabled }} + - name: INBOX_CACERT + value: {{ template "tlsPath" . }}/ca.crt + {{- end }} + - name: INBOX_REGION + value: {{ default "us-east-1" .Values.global.inbox.s3Region }} + - name: INBOX_URL + value: {{ required "S3 inbox URL missing" .Values.global.inbox.s3Url }} + {{- if .Values.global.inbox.s3Port }} + - name: INBOX_PORT + value: {{ .Values.global.inbox.s3Port | quote }} + {{- end }} + {{- else }} + value: "posix" + - name: INBOX_LOCATION + value: "{{ .Values.global.inbox.path }}/" + {{- end }} + {{- if eq "s3" .Values.global.inbox.storageType }} + - name: INBOX_ACCESSKEY + valueFrom: + secretKeyRef: + name: {{ template "sda.fullname" . }}-s3inbox-keys + key: s3InboxAccessKey + - name: INBOX_SECRETKEY + valueFrom: + secretKeyRef: + name: {{ template "sda.fullname" . }}-s3inbox-keys + key: s3InboxSecretKey + {{- end }} {{- if .Values.global.log.format }} - name: LOG_FORMAT value: {{ .Values.global.log.format | quote }} @@ -204,6 +238,10 @@ spec: {{- if .Values.global.tls.enabled }} - name: tls mountPath: {{ template "tlsPath" . }} + {{- end }} + {{- if eq "posix" .Values.global.inbox.storageType }} + - name: inbox + mountPath: {{ .Values.global.inbox.path | quote }} {{- end }} volumes: {{- if not .Values.global.vaultSecrets }} @@ -238,4 +276,15 @@ spec: secretName: {{ required "An certificate issuer or a TLS secret name is required for api" .Values.api.tls.secretName }} {{- end }} {{- end }} + {{- if eq "posix" .Values.global.inbox.storageType }} + - name: inbox + {{- if .Values.global.inbox.existingClaim }} + persistentVolumeClaim: + claimName: {{ .Values.global.inbox.existingClaim }} + {{- else }} + nfs: + server: {{ required "An inbox NFS server is required" .Values.global.inbox.nfsServer | quote }} + path: {{ if .Values.global.inbox.nfsPath }}{{ .Values.global.inbox.nfsPath | quote }}{{ else }}{{ "/" }}{{ end }} + {{- end }} + {{- end }} {{- end }} From 4e9efe8de015f77d0a997f1ddd48c949892cbc7a Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Thu, 21 Nov 2024 14:33:59 +0100 Subject: [PATCH 14/18] [api] add documentation --- sda/cmd/api/api.go | 2 +- sda/cmd/api/api.md | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index 0068a17b5..a17547e1c 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -322,7 +322,7 @@ func deleteFile(c *gin.Context) { } // Requires a filepath instead of fileID - // TODO: The remove fails randomly sometimes, maybe we should retry + // Note: The remove fails randomly sometimes var RetryTimes = 5 for count := 1; count <= RetryTimes; count++ { log.Warn("trying to remove file from inbox, try", count) diff --git a/sda/cmd/api/api.md b/sda/cmd/api/api.md index f7d6bf514..d07660143 100644 --- a/sda/cmd/api/api.md +++ b/sda/cmd/api/api.md @@ -74,6 +74,24 @@ Admin endpoints are only available to a set of whitelisted users specified in th curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"accession_id": "my-id-01", "filepath": "/uploads/file.c4gh", "user": "testuser"}' https://HOSTNAME/file/accession ``` +- `/file/:username/*fileid` + - accepts `DELETE` requests + - marks the file as `disabled` in the database, and deletes it from the inbox. + - The file is identfied by its id, returned by `users/:username/:files` + + - Response codes + - `200` Query execute ok. + - `400` File id not provided + - `401` Token user is not in the list of admins. + - `404` File not found + - `500` Internal error due to Inbox, DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"accession_ids": ["my-id-01", "my-id-02"], "dataset_id": "my-dataset-01"}' https://HOSTNAME/dataset/create + ``` + - `/dataset/create` - accepts `POST` requests with JSON data with the format: `{"accession_ids": ["", ""], "dataset_id": "", "user": ""}` - creates a dataset from the list of accession IDs and the dataset ID. From ad71a983be1ef5f8d2a640121075cc13e194a5a8 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Wed, 27 Nov 2024 09:14:28 +0100 Subject: [PATCH 15/18] [api] typos etc --- .github/integration/tests/sda/60_api_admin_test.sh | 2 +- sda/cmd/api/api.go | 1 - sda/cmd/api/api.md | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh index faab301da..30af0219c 100644 --- a/.github/integration/tests/sda/60_api_admin_test.sh +++ b/.github/integration/tests/sda/60_api_admin_test.sh @@ -3,7 +3,7 @@ set -e cd shared || true token="$(curl http://oidc:8080/tokens | jq -r '.[0]')" -# Upload a file and mke sure it's listed +# Upload a file and make sure it's listed result="$(curl -sk -L "http://api:8080/users/test@dummy.org/files" -H "Authorization: Bearer $token" | jq '. | length')" if [ "$result" -ne 2 ]; then echo "wrong number of files returned for user test@dummy.org" diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index a17547e1c..1509c0068 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -304,7 +304,6 @@ func deleteFile(c *gin.Context) { submissionUser := c.Param("username") log.Warn("submission user:", submissionUser) - // TODO: Add check for the input file path fileID := c.Param("file") fileID = strings.TrimPrefix(fileID, "/") log.Warn("submission file:", fileID) diff --git a/sda/cmd/api/api.md b/sda/cmd/api/api.md index d07660143..61d2417c6 100644 --- a/sda/cmd/api/api.md +++ b/sda/cmd/api/api.md @@ -89,7 +89,7 @@ Admin endpoints are only available to a set of whitelisted users specified in th Example: ```bash - curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"accession_ids": ["my-id-01", "my-id-02"], "dataset_id": "my-dataset-01"}' https://HOSTNAME/dataset/create + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE https://HOSTNAME/file/user@demo.org/123abc ``` - `/dataset/create` From fe0a1bed55a19e21078f8ce73d2c6039506a1c37 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Fri, 29 Nov 2024 08:52:02 +0100 Subject: [PATCH 16/18] [api] rephrase comment --- sda/cmd/api/api.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index 1509c0068..87c3cce4f 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -290,10 +290,8 @@ func ingestFile(c *gin.Context) { c.Status(http.StatusOK) } -// The deleteFile function will take the user id and the file id -// because the user id won't be in the path of the file in the future -// therefore we need to make the list files, get the file id and then -// call the delete file +// The deleteFile function deletes files from the inbox and marks them as +// discarded in the db. Files are identified by their ids and the user id. func deleteFile(c *gin.Context) { inbox, err := storage.NewBackend(Conf.Inbox) From 1d73ae387cf7b19e19e6447fb1f18d67a0e36c2c Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Fri, 29 Nov 2024 10:21:31 +0100 Subject: [PATCH 17/18] [tests] add rbac permissions for file deletion endpoint --- .github/integration/sda/rbac.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/integration/sda/rbac.json b/.github/integration/sda/rbac.json index d9832f547..92b681119 100644 --- a/.github/integration/sda/rbac.json +++ b/.github/integration/sda/rbac.json @@ -20,6 +20,11 @@ "path": "/file/accession", "action": "POST" }, + { + "role": "admin", + "path": "/file/*", + "action": "(DELETE)" + }, { "role": "submission", "path": "/users", From d690e54ed763a93ad86c2efb8f752fce33c61f28 Mon Sep 17 00:00:00 2001 From: MalinAhlberg Date: Mon, 2 Dec 2024 14:20:40 +0100 Subject: [PATCH 18/18] Apply suggestions from code review --- .github/integration/tests/sda/60_api_admin_test.sh | 10 ++++++++++ sda/cmd/api/api.go | 4 ++-- sda/cmd/api/api.md | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh index 30af0219c..8f089be89 100644 --- a/.github/integration/tests/sda/60_api_admin_test.sh +++ b/.github/integration/tests/sda/60_api_admin_test.sh @@ -34,6 +34,11 @@ done # get the fileId of the new file fileid="$(curl -k -L -H "Authorization: Bearer $token" -H "Content-Type: application/json" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')" +output=$(s3cmd -c s3cfg ls s3://test_dummy.org/NC12878.bam.c4gh 2>/dev/null) +if [ -z "$output" ] ; then + echo "Uploaded file not in inbox" + exit 1 +fi # delete it resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")" if [ "$resp" != "200" ]; then @@ -47,6 +52,11 @@ if [ "$last_event" != "disabled" ]; then echo "The file $fileid does not have the expected las event 'disabled', but $last_event." fi +output=$(s3cmd -c s3cfg ls s3://test_dummy.org/NC12878.bam.c4gh 2>/dev/null) +if [ -n "$output" ] ; then + echo "Deleted file is still in inbox" + exit 1 +fi # Try to delete an unknown file resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X DELETE "http://api:8080/file/test@dummy.org/badfileid")" diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index 87c3cce4f..023cde71e 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -300,11 +300,11 @@ func deleteFile(c *gin.Context) { } submissionUser := c.Param("username") - log.Warn("submission user:", submissionUser) + log.Debug("submission user:", submissionUser) fileID := c.Param("file") fileID = strings.TrimPrefix(fileID, "/") - log.Warn("submission file:", fileID) + log.Debug("submission file:", fileID) if fileID == "" { c.AbortWithStatusJSON(http.StatusBadRequest, "file ID is required") } diff --git a/sda/cmd/api/api.md b/sda/cmd/api/api.md index 61d2417c6..d8f9ace15 100644 --- a/sda/cmd/api/api.md +++ b/sda/cmd/api/api.md @@ -77,7 +77,7 @@ Admin endpoints are only available to a set of whitelisted users specified in th - `/file/:username/*fileid` - accepts `DELETE` requests - marks the file as `disabled` in the database, and deletes it from the inbox. - - The file is identfied by its id, returned by `users/:username/:files` + - The file is identified by its id, returned by `users/:username/:files` - Response codes - `200` Query execute ok.