Skip to content

Commit

Permalink
Backend schema refactor (#835)
Browse files Browse the repository at this point in the history
* added chatgpt func

* added check on api key

* init ai drive refactor

* ai_drive refactor

* added file upload failed message

* completed ai_drive refactor

* fixed old git repo links

* kernel fix

* minor improv

* msg using json

* using if main

* fixed train types

* kernel fix on using TrainSpace data schema

* fixed trainspace data schema in ai_drive

* input_df can be None issue

* moved input_df up

* enum read name

* obj detection

* zip_file -> img_file

* removed **trainspacedata where unneeded:

* fixed dict

* minor

* removed createExecution

* removed writetoq in backend

* 🎨 Format Python code with psf/black

* Prettified Code!

* parse deep user arch fix

* 🎨 Format Python code with psf/black

* mapping for nn layers

* generalizing make train bucket path

* 🎨 Format Python code with psf/black

* minor pydocs

* check on layers length for ML

* minor pydocs

* added logger to env

* minor tuple mismatcjh

* added logger to kernel

* 🎨 Format Python code with psf/black

* added logger to ai drive and driver

* abstraction in kernel's router() function

* 🎨 Format Python code with psf/black

* using main() func

Co-authored-by: karkir0003 <[email protected]>

* 🎨 Format Python code with psf/black

* using if main

* 🎨 Format Python code with psf/black

* fixed obj detection format

* removed all exeution_db

* added colored logs

* fix to python test parse arch

* minor comment fix

* dynamo db util fix for json objects

---------

Co-authored-by: farisdurrani <[email protected]>
Co-authored-by: karkir0003 <[email protected]>
  • Loading branch information
3 people authored Jul 12, 2023
1 parent ad5ea3d commit d5ad36c
Show file tree
Hide file tree
Showing 30 changed files with 453 additions and 516 deletions.
2 changes: 1 addition & 1 deletion .github/CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ representative at an online or offline event.

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
https://github.com/karkir0003.
https://github.com/DSGT-DLP/Deep-Learning-Playground.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,6 @@ dlp-terraform/**/.terraform/

# SSH keys
dlp-ssh.key

# Firebase Admin API key
backend/dlp-firebase-admin.json
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2022 karkir0003
Copyright (c) 2023 DSGT Deep Learning Playground

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
55 changes: 34 additions & 21 deletions backend/aws_helpers/dynamo_db_utils/DynamoUnitTests.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,27 +31,6 @@ if __name__ == "__main__":
print(6, delete_dynamo_item("trainspace", "ergsdf"))
```

## execution_db.py

```py
if __name__ == "__main__":
print(1)
print(2, getAllUserExecutionData("8hDeAbdZ9Lg301QFGdEYYeAq4Kw2"))
print(3, getExecutionData("exfddc9ad2666d31cae1790167aefc9aa34eb5d06a28e1805e8fa8881845d463a8"))
print(3, updateExecutionData("exfddc9ad2666d31cae1790167aefc9aa34eb5d06a28e1805e8fa8881845d463a8", {
"timestamp": datetime.now().isoformat(),
}))
print(4, createExecutionData(
ExecutionData(
execution_id=str(random.random()),
data_source='TABULAR',
name='hola',
status='QUEUED',
timestamp=str(datetime.now().isoformat()),
user_id='bleh'
)))
```

## trainspace.py

```py
Expand Down Expand Up @@ -128,6 +107,40 @@ if __name__ == "__main__":
)
),
)
data = {
"trainspace_id": "000033",
"uid": "00001",
"name": "My Trainspace",
"data_source": "TABULAR",
"dataset_data": {"name": "IRIS", "is_default_dataset": True},
"parameters_data": {
"target_col": "target",
"features": [
"sepal length (cm)",
"sepal width (cm)",
"petal length (cm)",
"petal width (cm)",
],
"problem_type": "CLASSIFICATION",
"criterion": "CELOSS",
"optimizer_name": "SGD",
"shuffle": True,
"epochs": 5,
"test_size": 0.2,
"batch_size": 20,
"layers": [
{"value": "LINEAR", "parameters": [10, 3]},
{"value": "RELU", "parameters": []},
{"value": "LINEAR", "parameters": [3, 10]},
{"value": "SOFTMAX", "parameters": [-1]},
],
},
"review_data": {
"notification_email": "[email protected]",
"notification_phone_number": "",
},
}
print(6, TrainspaceData(**(data)))
```

## userprogress_db.py
Expand Down
3 changes: 3 additions & 0 deletions backend/aws_helpers/dynamo_db_utils/dynamo_db_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from decimal import Decimal
import json
import boto3
from backend.aws_helpers.dynamo_db_utils.constants import ALL_DYANMODB_TABLES
from backend.common.constants import AWS_REGION
Expand Down Expand Up @@ -119,6 +121,7 @@ def create_dynamo_item(table_name: str, input_item: dict) -> bool:
if input_item.get(partition_key) is None:
raise ValueError("Item must have the partition key: " + partition_key)
gsi_key = ALL_DYANMODB_TABLES[table_name].get("gsi")
input_item = json.loads(json.dumps(input_item), parse_float=Decimal)
if gsi_key and input_item.get(gsi_key) is None:
raise ValueError("Item must have the gsi key: " + gsi_key)

Expand Down
86 changes: 0 additions & 86 deletions backend/aws_helpers/dynamo_db_utils/execution_db.py

This file was deleted.

2 changes: 1 addition & 1 deletion backend/aws_helpers/dynamo_db_utils/trainspace_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class TrainspaceData:
name: str = ""
parameters_data: dict = None
review_data: str = ""
status: TrainStatus = TrainStatus.QUEUED
status: str = TrainStatus.QUEUED.name


def getTrainspaceData(trainspace_id: str) -> dict:
Expand Down
6 changes: 3 additions & 3 deletions backend/aws_helpers/dynamo_db_utils/userprogress_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def updateUserProgressData(uid: str, requestData: dict) -> bool:
return update_dynamo_item(TABLE_NAME, uid, requestData)


def createUserProgressData(execution_data: UserProgressData) -> bool:
def createUserProgressData(user_progress_data: UserProgressData) -> bool:
"""
Create a new entry or replaces an existing entry table according to the `uid`.
@param execution_data: uid and other table attributes to be created or updated if the entry already exists
@param user_progress_data: uid and other table attributes to be created or updated if the entry already exists
@return: True if the creation or update is successful
"""

return create_dynamo_item(TABLE_NAME, execution_data.__dict__)
return create_dynamo_item(TABLE_NAME, user_progress_data.__dict__)
37 changes: 35 additions & 2 deletions backend/aws_helpers/s3_utils/s3_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import datetime
from backend.aws_helpers.dynamo_db_utils.trainspace_db import TrainspaceData
import boto3
import os
import shutil
import pandas as pd
import io

from backend.aws_helpers.s3_utils.s3_bucket_names import FILE_UPLOAD_BUCKET_NAME

Expand All @@ -28,7 +30,7 @@ def write_to_bucket(file_path: str, bucket_name: str, bucket_path: str):

def read_from_bucket(
bucket_name: str, bucket_path: str, output_file_name: str, output_file_path: str
):
) -> None:
"""
Given S3 URI, read the file from the S3 bucket
Expand All @@ -48,6 +50,37 @@ def read_from_bucket(
)


def read_df_from_bucket(bucket_name: str, bucket_path: str) -> pd.DataFrame:
"""
Given S3 URI, read the file from the S3 bucket and return a pandas dataframe
Args:
bucket_name (str): name of s3 bucket
bucket_path (str): path within s3 bucket where the file resides
"""
s3 = boto3.client("s3")
obj = s3.get_object(Bucket=bucket_name, Key=bucket_path)
df = pd.read_csv(io.BytesIO(obj["Body"].read()))
return df


def make_train_bucket_path(trainspace_data: TrainspaceData) -> str:
"""
Given a TrainspaceData object, return the path to the bucket where the training data will be stored
Args:
trainspace_data (TrainspaceData): object containing data about the training data
Returns:
bucket_path (str): path to bucket where training data will be stored
"""
uid = trainspace_data.uid
data_source = trainspace_data.data_source.lower()
filename = trainspace_data.dataset_data["name"]
return f"{uid}/{data_source}/{filename}"


def get_presigned_url_from_bucket(bucket_name: str, bucket_path: str):
"""
Given S3 URI, read the file from the S3 bucket
Expand Down
Loading

0 comments on commit d5ad36c

Please sign in to comment.