Skip to content

Commit

Permalink
Merge pull request #1 from johanherman/new-service
Browse files Browse the repository at this point in the history
New snpseq-archive-db service for handling archive metadata
  • Loading branch information
Stephan Lohse authored Nov 20, 2017
2 parents 8350158 + 3433a9b commit 954bfd0
Show file tree
Hide file tree
Showing 14 changed files with 518 additions and 2 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
*.swp
*.pyc
*.egg-info/
build/
dist/
*.egg
*.db
19 changes: 19 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[[source]]

verify_ssl = true
name = "pypi"
url = "https://pypi.python.org/simple"


[packages]

tornado = { version = "==4.5.2"}
peewee = { version = "==2.10.2"}
arteria-core = {git = "https://github.com/arteria-project/arteria-core.git", ref = "v1.1.0"}
jsonpickle = { version = ">=0.9.5"}


[dev-packages]

mock = "*"
nose = "*"
88 changes: 88 additions & 0 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 31 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,31 @@
# snpseq-archive-db
REST service for storing archive metadata in SQLite db
Arteria Archive DB
==================

A self contained (Tornado) REST service that serves as a frontend for a simple SQL db that contains the state of our uploads, verifications and removals done by other Arteria archive services.

Trying it out
-------------

python3 -m pip install pipenv
pipenv install --deploy


Try running it:

pipenv run ./archive-db-ws --config=config/ --port=8888 --debug

And then you can find a simple API documentation by going to:

http://localhost:8888/api/1.0

Running tests
-------------

pipenv install --dev
pipenv run nosetests tests/


REST endpoints
--------------

# FIXME: Update example
11 changes: 11 additions & 0 deletions archive-db-ws
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python3

# -*- coding: utf-8 -*-
import re
import sys

from archive_db.app import start

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(start())
1 change: 1 addition & 0 deletions archive_db/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "1.0.0"
40 changes: 40 additions & 0 deletions archive_db/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import datetime

from archive_db.models.Model import init_db, Archive, Upload, Verification, Removal
from archive_db.handlers.DbHandlers import UploadHandler, VerificationHandler, RemovalHandler, VersionHandler

from arteria.web.app import AppService
from peewee import *
from tornado.web import URLSpec as url


def routes(**kwargs):
"""
Setup routes and feed them any kwargs passed, e.g.`routes(config=app_svc.config_svc)`
Help will be automatically available at /api, and will be based on the
doc strings of the get/post/put/delete methods
:param: **kwargs will be passed when initializing the routes.
"""

return [
url(r"/api/1.0/version", VersionHandler, name="version", kwargs=kwargs),
url(r"/api/1.0/upload/", UploadHandler, name="upload"),
url(r"/api/1.0/verifification/([\w_-]+)",
VerificationHandler, name="verification", kwargs=kwargs),
url(r"/api/1.0/removal/([\w_-]+)", RemovalHandler, name="removal", kwargs=kwargs)
]


def start():
"""
Start the archive-db-ws app
"""
app_svc = AppService.create(__package__)

db_path = app_svc.config_svc["archive_db_path"]
init_db(db_path)

app_svc.start(routes(config=app_svc.config_svc))

if __name__ == '__main__':
start()
136 changes: 136 additions & 0 deletions archive_db/handlers/DbHandlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import datetime as dt

from arteria.web.handlers import BaseRestHandler

from archive_db.models.Model import Archive, Upload, Verification, Removal
from archive_db import __version__ as version

from tornado import gen
from tornado.web import RequestHandler, HTTPError
from tornado.escape import json_decode, json_encode

# TODO: Shall we implement a handler for something like:
# "Has any package been verified since date `bar`?
# At the moment this can be solved in the client by comparing
# with `last verified date`.

"""
Our handlers are supposed to work as following:
POST /upload/ - create a new archive entry + upload entry
GET /upload/ - get last global upload
GET /upload/<archive> - get last upload for <archive>
POST /verification/ - create a new verification entry
GET /verification/ - get last global verification
GET /verification/<archive> - get last verification for <archive>
POST /removal/<archive> - create a new removal entry for <archive>
"""


class BaseHandler(BaseRestHandler):
# BaseRestHandler.body_as_object() does not work well
# in Python 3 due to string vs byte strings.

def decode(self, required_members=[]):
obj = json_decode(self.request.body)

for member in required_members:
if member not in obj:
raise HTTPError(400, "Expecting '{0}' in the JSON body".format(member))
return obj


class UploadHandler(BaseHandler):

@gen.coroutine
def post(self):
"""
Archive `path` was just now uploaded (not?) OK.
:param path: Path to archive uploaded
:param description: The TSM description of the archive
:param host: From which host the archive was uploaded
"""

body = self.decode(required_members=["path", "description", "host"])
archive, created = Archive.get_or_create(
description=body["description"], path=body["path"], host=body["host"])

upload = Upload.create(archive=archive, timestamp=dt.datetime.utcnow())

self.write_json({"status": "created", "upload":
{"id": upload.id,
"timestamp": str(upload.timestamp),
"description": upload.archive.description,
"path": upload.archive.path,
"host": upload.archive.host}})

@gen.coroutine
def get(self, archive):
"""
Archive `foo` was last uploaded OK at date `bar`.
:param archive: Path to archive uploaded
:param description: The TSM description of the archive
:param host: From which host the archive was uploaded
:return The `archive` when it was last uploaded. If no `archive` specified, then it will
return the last global upload archive.
"""
# Step 1 - get date when archive was last updated
pass

# TODO: We might have to add logic in some of the services
# that adds a file with the description inside the archive,
# so we can verify that we're operating on the correct
# archive before verifying/removing.


class VerificationHandler(BaseHandler):

@gen.coroutine
def post(self, archive):
"""
Archive `foo` was verified (not) OK at date `bar`.
:param archive: Path to archive verified
:param description: The TSM description of the archive we verified
"""
pass
# Step 1 - set date when archive was verified OK

@gen.coroutine
def get(self, archive):
"""
Give me the date for when any archive was last verified (OK).
:param archive: Path to archive we want to check
:return The `archive` when it was last verified. If no `archive` specified, then it will
return the last globally verified archive.
"""
pass


class RemovalHandler(BaseHandler):

@gen.coroutine
def post(self, archive):
"""
Archive `foo` was removed from disk at date `bar`.
:param archive: Path to archive removed from disk
:param description: The TSM description of the archive we removed
"""
pass


class VersionHandler(BaseHandler):

"""
Get the version of the service
"""

def get(self):
"""
Returns the version of the checksum-service
"""
self.write_object({"version": version})
Empty file added archive_db/handlers/__init__.py
Empty file.
55 changes: 55 additions & 0 deletions archive_db/models/Model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from peewee import *

# TODO: Shall we log failed operations? (not uploaded OK, not verified OK)
# TODO: Shall we have anything to do with staging operations?

db_proxy = Proxy()


def init_db(mydb="archives.db"):
db = SqliteDatabase(mydb)
db_proxy.initialize(db)
db.create_tables([Archive, Upload, Verification, Removal], safe=True)


class BaseModel(Model):

class Meta:
database = db_proxy


class ChildModel(BaseModel):

def __repr__(self):
return "ID: {}, Archive ID: {}, Timestamp: {}".format(self.id, self.archive, self.timestamp)


class Archive(BaseModel):

def __repr__(self):
return "ID: {}, Description: {}, Path: {}, Host: {}".format(self.id, self.description, self.path, self.host)

description = CharField(index=True, unique=True)
path = CharField(index=True)
host = CharField()


class Upload(ChildModel):
archive = ForeignKeyField(Archive, related_name="uploads")
timestamp = DateTimeField()


class Verification(ChildModel):
archive = ForeignKeyField(Archive, related_name="verifications")
timestamp = DateTimeField()


class Removal(ChildModel):
archive = ForeignKeyField(Archive, related_name="removals")
timestamp = DateTimeField()

# For schema migrations, see http://docs.peewee-orm.com/en/latest/peewee/database.html#schema-migrations
# and http://docs.peewee-orm.com/en/latest/peewee/playhouse.html#migrate
#
# Make sure that we *always*, as extra security, take a backup of the previous
# db before doing a migration. We should also take continous backups
Empty file added archive_db/models/__init__.py
Empty file.
Loading

0 comments on commit 954bfd0

Please sign in to comment.