-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/mtchbx 45 #24
base: main
Are you sure you want to change the base?
Changes from all commits
e7024fb
ab5ecbc
3b1ced5
de47420
9c5fde0
1199d5e
b4ab949
0cf1d7b
f054b2f
2d00496
725048e
e73215a
6f6332f
88deb28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,18 @@ | ||
from binascii import hexlify | ||
from enum import StrEnum | ||
from typing import Annotated | ||
|
||
from dotenv import find_dotenv, load_dotenv | ||
from fastapi import Depends, FastAPI, HTTPException | ||
from fastapi import Depends, FastAPI, Form, HTTPException, UploadFile | ||
from pydantic import BaseModel | ||
|
||
from matchbox.common.graph import ResolutionGraph | ||
from matchbox.server.base import BackendManager, MatchboxDBAdapter | ||
from matchbox.server.utils.s3 import upload_to_s3 | ||
|
||
dotenv_path = find_dotenv(usecwd=True) | ||
load_dotenv(dotenv_path) | ||
|
||
|
||
app = FastAPI( | ||
title="matchbox API", | ||
version="0.2.0", | ||
|
@@ -45,6 +46,21 @@ class CountResult(BaseModel): | |
entities: dict[BackendEntityType, int] | ||
|
||
|
||
class SourceItem(BaseModel): | ||
"""Response model for source""" | ||
|
||
schema: str | ||
table: str | ||
id: str | ||
resolution: str | None = None | ||
|
||
|
||
class Sources(BaseModel): | ||
"""Response model for sources""" | ||
|
||
sources: list[SourceItem] | ||
Comment on lines
+58
to
+61
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need a separate pydantic object? Can't we just return list[Sources]? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
|
||
def get_backend() -> MatchboxDBAdapter: | ||
return BackendManager.get_backend() | ||
|
||
|
@@ -76,18 +92,49 @@ async def clear_backend(): | |
|
||
|
||
@app.get("/sources") | ||
async def list_sources(): | ||
raise HTTPException(status_code=501, detail="Not implemented") | ||
async def list_sources( | ||
backend: Annotated[MatchboxDBAdapter, Depends(get_backend)], | ||
) -> Sources: | ||
datasets = backend.datasets.list() | ||
result = [] | ||
for dataset in datasets: | ||
result.append( | ||
SourceItem( | ||
table=dataset.table, | ||
id=dataset.id, | ||
schema=dataset.schema, | ||
resolution=hexlify(dataset.resolution).decode("ascii"), | ||
) | ||
) | ||
return Sources(sources=result) | ||
|
||
|
||
@app.get("/sources/{hash}") | ||
async def get_source(hash: str): | ||
raise HTTPException(status_code=501, detail="Not implemented") | ||
|
||
|
||
@app.post("/sources/{hash}") | ||
async def add_source(hash: str): | ||
raise HTTPException(status_code=501, detail="Not implemented") | ||
async def get_source( | ||
hash: str, backend: Annotated[MatchboxDBAdapter, Depends(get_backend)] | ||
) -> dict[str, SourceItem] | str: | ||
datasets = backend.datasets.list() | ||
for dataset in datasets: | ||
resolution = hexlify(dataset.resolution).decode("ascii") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we use base64? |
||
if resolution == hash: | ||
result_obj = SourceItem( | ||
table=dataset.table, | ||
id=dataset.id, | ||
schema=dataset.schema, | ||
resolution=resolution, | ||
) | ||
return {"source": result_obj} | ||
return "Source not found" | ||
Comment on lines
+126
to
+127
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the source is not found we need to return a 404, and use a response with a proper schema, probably a codifier error message. And it is found, let's just return the object, no need to wrap it in a dict. |
||
|
||
|
||
@app.post("/sources/uploadFile") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. camelCase? Are we sure? We should also follow the endpoint convention we'd set |
||
async def add_source_to_s3( | ||
file: UploadFile, bucket_name: str = Form(...), object_name: str = Form(...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we shouldn't allow specifying a bucket name, nor an object name (directly) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From a quick look at the docs, they're using annotations for form parameters |
||
): | ||
is_file_uploaded = upload_to_s3(file.file, bucket_name, object_name) | ||
if is_file_uploaded: | ||
return "File was successfully uplaoded" | ||
return "File could not be uplaoded" | ||
Comment on lines
+135
to
+137
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HTTP status codes please, and Pydantic schemas for the response |
||
|
||
|
||
@app.get("/models") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from typing import BinaryIO | ||
|
||
import boto3 | ||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError | ||
|
||
|
||
def upload_to_s3( | ||
file: BinaryIO, bucket_name: str, object_name: str | None = None | ||
) -> bool: | ||
""" | ||
Upload a file to an S3 bucket. | ||
|
||
Args: | ||
file (BinaryIO): File to upload. | ||
bucket_name (str): Target S3 bucket. | ||
object_name (str): S3 object name. If not specified, file_name is used. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's |
||
|
||
Returns: | ||
bool: True if the file was uploaded, else False. | ||
""" | ||
s3_client = boto3.client("s3") | ||
try: | ||
s3_client.upload_fileobj(file, bucket_name, object_name) | ||
return True | ||
except NoCredentialsError: | ||
return False | ||
except PartialCredentialsError: | ||
return False |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
ResolutionGraph, | ||
) | ||
from matchbox.server import app | ||
from matchbox.server.postgresql.orm import Sources | ||
|
||
client = TestClient(app) | ||
|
||
|
@@ -51,13 +52,40 @@ def test_count_backend_item(self, get_backend): | |
# response = client.post("/testing/clear") | ||
# assert response.status_code == 200 | ||
|
||
# def test_list_sources(): | ||
# response = client.get("/sources") | ||
# assert response.status_code == 200 | ||
@patch("matchbox.server.base.BackendManager.get_backend") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The tests will need to be updated to reflect requested changes in the API |
||
def test_list_sources(self, get_backend): | ||
hash_hex = "5eb63bbbe01eeed093cb22bb8f5acdc3" | ||
byte_arr = bytearray.fromhex(hash_hex) | ||
obj_mock = Sources( | ||
table="mock table", schema="mock_schema", id="mock_id", resolution=byte_arr | ||
) | ||
mock_backend = Mock() | ||
mock_backend.datasets.list = Mock(return_value=[obj_mock]) | ||
get_backend.return_value = mock_backend | ||
response = client.get("/sources") | ||
assert response.status_code == 200 | ||
|
||
# def test_get_source(): | ||
# response = client.get("/sources/test_source") | ||
# assert response.status_code == 200 | ||
@patch("matchbox.server.base.BackendManager.get_backend") | ||
def test_get_source(self, get_backend): | ||
hash_hex = "5eb63bbbe01eeed093cb22bb8f5acdc3" | ||
byte_arr = bytearray.fromhex(hash_hex) | ||
obj_mock = Sources( | ||
table="mock_table", schema="mock_schema", id="mock_id", resolution=byte_arr | ||
) | ||
mock_backend = Mock() | ||
mock_backend.datasets.list = Mock(return_value=[obj_mock]) | ||
get_backend.return_value = mock_backend | ||
|
||
response = client.get(f"/sources/{hash_hex}") | ||
assert response.status_code == 200 | ||
assert response.json() == { | ||
"source": { | ||
"schema": "mock_schema", | ||
"table": "mock_table", | ||
"id": "mock_id", | ||
"resolution": hash_hex, | ||
} | ||
} | ||
|
||
# def test_add_source(): | ||
# response = client.post("/sources") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we start moving this stuff to common?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See below. I think my favoured approach is separate
Response*
classes for now?