Skip to content

Commit

Permalink
Develop (#9)
Browse files Browse the repository at this point in the history
* update syntax for query

* add test cases for verification, rename query parameters

* bump version

* add parameter for setting timestamp, use deterministic timestamps in tests

* change parameter names in docs

* fixed dependency versions, modified query, updated test
  • Loading branch information
b97pla authored Jun 27, 2023
1 parent 30961b7 commit 340ae83
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 70 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Print the N (positive integer) latest uploads from the database:

Query the database for uploads matching specific criteria:

curl -i -X "POST" -d '{"host": "biotank", "before_date": "2023-03-01", "verified": "False"}' http://localhost:8888/api/1.0/query
curl -i -X "POST" -d '{"host": "biotank", "uploaded_before": "2023-03-01", "verified": "False"}' http://localhost:8888/api/1.0/query

Docker container
----------------
Expand Down
82 changes: 50 additions & 32 deletions archive_db/handlers/DbHandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,17 @@ def post(self):
:param path: Path to archive uploaded
:param description: The unique TSM description of the archive
:param host: From which host the archive was uploaded
:param timestamp: (optional) if specified, use this timestamp for the upload instead of
datetime.datetime.utcnow().isoformat()
:return Information about the created object
"""

body = self.decode(required_members=["path", "description", "host"])
tstamp = body.get("timestamp", dt.datetime.utcnow().isoformat())
archive, created = Archive.get_or_create(
description=body["description"], path=body["path"], host=body["host"])

upload = Upload.create(archive=archive, timestamp=dt.datetime.utcnow())
upload = Upload.create(archive=archive, timestamp=tstamp)

self.write_json({"status": "created", "upload":
{"id": upload.id,
Expand All @@ -67,13 +70,19 @@ def post(self):
:param description: The unique TSM description of the archive we've verified.
:param path: The path to the archive that was uploaded
:param host: The host from which the archive was uploaded
:param timestamp: (optional) if specified, use this timestamp for the verification instead
of datetime.datetime.utcnow().isoformat()
:return Information about the created object
"""
body = self.decode(required_members=["description", "path", "host"])
tstamp = body.get("timestamp", dt.datetime.utcnow().isoformat())

archive, created = Archive.get_or_create(description=body["description"], host=body["host"], path=body["path"])
archive, created = Archive.get_or_create(
description=body["description"],
host=body["host"],
path=body["path"])

verification = Verification.create(archive=archive, timestamp=dt.datetime.utcnow())
verification = Verification.create(archive=archive, timestamp=tstamp)

self.write_json({"status": "created", "verification":
{"id": verification.id,
Expand All @@ -82,6 +91,7 @@ def post(self):
"path": verification.archive.path,
"host": verification.archive.host}})


class RandomUnverifiedArchiveHandler(BaseHandler):

@gen.coroutine
Expand All @@ -94,41 +104,47 @@ def get(self):
:param age: Number of days we should look back when picking an unverified archive
:param safety_margin: Number of days we should use as safety buffer
:param today: (optional) if specified, use this timestamp for the reference date instead of
datetime.datetime.utcnow().isoformat()
:return A randomly pickedunverified archive within the specified date interval
"""
body = self.decode(required_members=["age", "safety_margin"])
age = int(body["age"])
margin = int(body["safety_margin"])
today = body.get("today", dt.date.today().isoformat())

from_timestamp = dt.datetime.utcnow() - dt.timedelta(days=age+margin)
to_timestamp = dt.datetime.utcnow() - dt.timedelta(days=margin)
from_timestamp = dt.datetime.fromisoformat(today) - dt.timedelta(days=age+margin)
to_timestamp = from_timestamp + dt.timedelta(days=age)

# "Give me a randomly chosen archive that was uploaded between from_timestamp and
# to_timestamp, and has no previous verifications"
query = (Upload
.select()
.join(Verification, JOIN.LEFT_OUTER, on=(
Verification.archive_id == Upload.archive_id))
.where(Upload.timestamp.between(from_timestamp, to_timestamp))
.group_by(Upload.archive_id)
.having(fn.Count(Verification.id) < 1)
.order_by(fn.Random())
.limit(1))
query = Upload\
.select()\
.join(Verification, JOIN.LEFT_OUTER, on=(
Verification.archive_id == Upload.archive_id))\
.where(Upload.timestamp.between(from_timestamp, to_timestamp))\
.group_by(Upload.archive_id)\
.having(fn.Count(Verification.id) < 1)\
.order_by(fn.Random())

result_len = query.count()

if result_len > 0:
upload = next(query.execute())
upload = query.first()
archive_name = os.path.basename(os.path.normpath(upload.archive.path))
self.write_json({"status": "unverified", "archive":
{"timestamp": str(upload.timestamp),
"path": upload.archive.path,
"description": upload.archive.description,
"host": upload.archive.host,
"archive": archive_name}})
self.write_json({
"status": "unverified",
"archive": {
"timestamp": str(upload.timestamp),
"path": upload.archive.path,
"description": upload.archive.description,
"host": upload.archive.host,
"archive": archive_name
}
})
else:
msg = "No unverified archives uploaded between {} and {} was found!".format(
from_timestamp.strftime("%Y-%m-%d %H:%M:%S"), to_timestamp.strftime("%Y-%m-%d %H:%M:%S"))
msg = f"No unverified archives uploaded between {from_timestamp} and {to_timestamp} " \
f"was found!"
self.set_status(204, reason=msg)


Expand Down Expand Up @@ -210,6 +226,8 @@ def _db_query():
).join(
Removal, JOIN.LEFT_OUTER, on=(Removal.archive_id == Archive.id)
).order_by(
Removal.timestamp.desc(),
Verification.timestamp.desc(),
Upload.timestamp.desc(),
Archive.path.asc())
return query
Expand All @@ -221,9 +239,9 @@ def _do_query(self, query):
"host": row["host"],
"path": row["path"],
"description": row["description"],
"uploaded": row["uploaded"].isoformat() if row["uploaded"] else None,
"verified": row["verified"].isoformat() if row["verified"] else None,
"removed": row["removed"].isoformat() if row["removed"] else None}
"uploaded": str(row["uploaded"]) if row["uploaded"] else None,
"verified": str(row["verified"]) if row["verified"] else None,
"removed": str(row["removed"]) if row["removed"] else None}
for row in query
]})
else:
Expand Down Expand Up @@ -272,9 +290,9 @@ def post(self):
partially match this string
:param host: (optional) fetch archives that were uploaded from a host whose hostname fully
or partially match this string
:param before_date: (optional) fetch archives that were uploaded on or before this date,
:param uploaded_before: (optional) fetch archives that were uploaded on or before this date,
formatted as YYYY-MM-DD
:param after_date: (optional) fetch archives that were uploaded on or after this date,
:param uploaded_after: (optional) fetch archives that were uploaded on or after this date,
formatted as YYYY-MM-DD
:param verified: (optional) if True, fetch only archives that have been successfully
verified. If False, fetch only archives that have not been verified. If omitted, fetch
Expand All @@ -294,14 +312,14 @@ def post(self):
query = query.where(Archive.description.contains(body["description"]))
if body.get("host"):
query = query.where(Archive.host.contains(body["host"]))
if body.get("before_date"):
if body.get("uploaded_before"):
query = query.where(
Upload.timestamp <= dt.datetime.strptime(
f"{body['before_date']} 23:59:59",
f"{body['uploaded_before']} 23:59:59",
"%Y-%m-%d %H:%M:%S"))
if body.get("after_date"):
if body.get("uploaded_after"):
query = query.where(
Upload.timestamp >= dt.datetime.strptime(body["after_date"], "%Y-%m-%d"))
Upload.timestamp >= dt.datetime.strptime(body["uploaded_after"], "%Y-%m-%d"))
if body.get("verified") is not None and body["verified"] in ["True", "False"]:
query = query.where(Verification.timestamp.is_null(body["verified"] == "False"))
if body.get("removed") is not None and body["removed"] in ["True", "False"]:
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include = ["archive_db*"]

[project]
name = "archive-db"
version = "1.2.1"
version = "1.3.1"
authors = [
{name = "SNP&SEQ Technology Platform, Uppsala University", email = "[email protected]" },
]
Expand All @@ -23,14 +23,14 @@ classifiers = [
"Topic :: Scientific/Engineering :: Bio-Informatics"
]
dependencies = [
"peewee",
"tornado",
"arteria"
"peewee==3.16.0",
"tornado==6.2",
"arteria==1.1.4"
]

[project.optional-dependencies]
test = [
"nose"
"nose==1.3.7"
]

[project.scripts]
Expand Down
Loading

0 comments on commit 340ae83

Please sign in to comment.