Backend schema refactor (#835)

* added chatgpt func * added check on api key * init ai drive refactor * ai_drive refactor * added file upload failed message * completed ai_drive refactor * fixed old git repo links * kernel fix * minor improv * msg using json * using if main * fixed train types * kernel fix on using TrainSpace data schema * fixed trainspace data schema in ai_drive * input_df can be None issue * moved input_df up * enum read name * obj detection * zip_file -> img_file * removed **trainspacedata where unneeded: * fixed dict * minor * removed createExecution * removed writetoq in backend * 🎨 Format Python code with psf/black * Prettified Code! * parse deep user arch fix * 🎨 Format Python code with psf/black * mapping for nn layers * generalizing make train bucket path * 🎨 Format Python code with psf/black * minor pydocs * check on layers length for ML * minor pydocs * added logger to env * minor tuple mismatcjh * added logger to kernel * 🎨 Format Python code with psf/black * added logger to ai drive and driver * abstraction in kernel's router() function * 🎨 Format Python code with psf/black * using main() func Co-authored-by: karkir0003 <[email protected]> * 🎨 Format Python code with psf/black * using if main * 🎨 Format Python code with psf/black * fixed obj detection format * removed all exeution_db * added colored logs * fix to python test parse arch * minor comment fix * dynamo db util fix for json objects --------- Co-authored-by: farisdurrani <[email protected]> Co-authored-by: karkir0003 <[email protected]>
DSGT-DLP · Jul 12, 2023 · d5ad36c · d5ad36c
1 parent ad5ea3d
commit d5ad36c
Show file tree

Hide file tree

Showing 30 changed files with 453 additions and 516 deletions.
diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
@@ -60,7 +60,7 @@ representative at an online or offline event.
 
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement at
-https://github.com/karkir0003.
+https://github.com/DSGT-DLP/Deep-Learning-Playground.
 All complaints will be reviewed and investigated promptly and fairly.
 
 All community leaders are obligated to respect the privacy and security of the

diff --git a/.gitignore b/.gitignore
@@ -208,3 +208,6 @@ dlp-terraform/**/.terraform/
 
 # SSH keys
 dlp-ssh.key
+
+# Firebase Admin API key
+backend/dlp-firebase-admin.json
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 karkir0003
+Copyright (c) 2023 DSGT Deep Learning Playground
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/backend/aws_helpers/dynamo_db_utils/DynamoUnitTests.md b/backend/aws_helpers/dynamo_db_utils/DynamoUnitTests.md
@@ -31,27 +31,6 @@ if __name__ == "__main__":
     print(6, delete_dynamo_item("trainspace", "ergsdf"))
 ```
 
-## execution_db.py
-
-```py
-if __name__ == "__main__":
-    print(1)
-    print(2, getAllUserExecutionData("8hDeAbdZ9Lg301QFGdEYYeAq4Kw2"))
-    print(3, getExecutionData("exfddc9ad2666d31cae1790167aefc9aa34eb5d06a28e1805e8fa8881845d463a8"))
-    print(3, updateExecutionData("exfddc9ad2666d31cae1790167aefc9aa34eb5d06a28e1805e8fa8881845d463a8", {
-        "timestamp": datetime.now().isoformat(),
-    }))
-    print(4, createExecutionData(
-        ExecutionData(
-            execution_id=str(random.random()),
-            data_source='TABULAR',
-            name='hola',
-            status='QUEUED',
-            timestamp=str(datetime.now().isoformat()),
-            user_id='bleh'
-    )))
-```
-
 ## trainspace.py
 
 ```py
@@ -128,6 +107,40 @@ if __name__ == "__main__":
             )
         ),
     )
+    data = {
+        "trainspace_id": "000033",
+        "uid": "00001",
+        "name": "My Trainspace",
+        "data_source": "TABULAR",
+        "dataset_data": {"name": "IRIS", "is_default_dataset": True},
+        "parameters_data": {
+            "target_col": "target",
+            "features": [
+                "sepal length (cm)",
+                "sepal width (cm)",
+                "petal length (cm)",
+                "petal width (cm)",
+            ],
+            "problem_type": "CLASSIFICATION",
+            "criterion": "CELOSS",
+            "optimizer_name": "SGD",
+            "shuffle": True,
+            "epochs": 5,
+            "test_size": 0.2,
+            "batch_size": 20,
+            "layers": [
+                {"value": "LINEAR", "parameters": [10, 3]},
+                {"value": "RELU", "parameters": []},
+                {"value": "LINEAR", "parameters": [3, 10]},
+                {"value": "SOFTMAX", "parameters": [-1]},
+            ],
+        },
+        "review_data": {
+            "notification_email": "[email protected]",
+            "notification_phone_number": "",
+        },
+    }
+    print(6, TrainspaceData(**(data)))
 ```
 
 ## userprogress_db.py

diff --git a/backend/aws_helpers/dynamo_db_utils/dynamo_db_utils.py b/backend/aws_helpers/dynamo_db_utils/dynamo_db_utils.py
@@ -1,3 +1,5 @@
+from decimal import Decimal
+import json
 import boto3
 from backend.aws_helpers.dynamo_db_utils.constants import ALL_DYANMODB_TABLES
 from backend.common.constants import AWS_REGION
@@ -119,6 +121,7 @@ def create_dynamo_item(table_name: str, input_item: dict) -> bool:
     if input_item.get(partition_key) is None:
         raise ValueError("Item must have the partition key: " + partition_key)
     gsi_key = ALL_DYANMODB_TABLES[table_name].get("gsi")
+    input_item = json.loads(json.dumps(input_item), parse_float=Decimal)
     if gsi_key and input_item.get(gsi_key) is None:
         raise ValueError("Item must have the gsi key: " + gsi_key)
 

diff --git a/backend/aws_helpers/dynamo_db_utils/execution_db.py b/backend/aws_helpers/dynamo_db_utils/execution_db.py
diff --git a/backend/aws_helpers/dynamo_db_utils/trainspace_db.py b/backend/aws_helpers/dynamo_db_utils/trainspace_db.py
@@ -27,7 +27,7 @@ class TrainspaceData:
     name: str = ""
     parameters_data: dict = None
     review_data: str = ""
-    status: TrainStatus = TrainStatus.QUEUED
+    status: str = TrainStatus.QUEUED.name
 
 
 def getTrainspaceData(trainspace_id: str) -> dict:

diff --git a/backend/aws_helpers/dynamo_db_utils/userprogress_db.py b/backend/aws_helpers/dynamo_db_utils/userprogress_db.py
@@ -41,12 +41,12 @@ def updateUserProgressData(uid: str, requestData: dict) -> bool:
     return update_dynamo_item(TABLE_NAME, uid, requestData)
 
 
-def createUserProgressData(execution_data: UserProgressData) -> bool:
+def createUserProgressData(user_progress_data: UserProgressData) -> bool:
     """
     Create a new entry or replaces an existing entry table according to the `uid`.
 
-    @param execution_data: uid and other table attributes to be created or updated if the entry already exists
+    @param user_progress_data: uid and other table attributes to be created or updated if the entry already exists
     @return: True if the creation or update is successful
     """
 
-    return create_dynamo_item(TABLE_NAME, execution_data.__dict__)
+    return create_dynamo_item(TABLE_NAME, user_progress_data.__dict__)
diff --git a/backend/aws_helpers/s3_utils/s3_client.py b/backend/aws_helpers/s3_utils/s3_client.py
@@ -1,7 +1,9 @@
-import datetime
+from backend.aws_helpers.dynamo_db_utils.trainspace_db import TrainspaceData
 import boto3
 import os
 import shutil
+import pandas as pd
+import io
 
 from backend.aws_helpers.s3_utils.s3_bucket_names import FILE_UPLOAD_BUCKET_NAME
 
@@ -28,7 +30,7 @@ def write_to_bucket(file_path: str, bucket_name: str, bucket_path: str):
 
 def read_from_bucket(
     bucket_name: str, bucket_path: str, output_file_name: str, output_file_path: str
-):
+) -> None:
     """
     Given S3 URI, read the file from the S3 bucket
 
@@ -48,6 +50,37 @@ def read_from_bucket(
     )
 
 
+def read_df_from_bucket(bucket_name: str, bucket_path: str) -> pd.DataFrame:
+    """
+    Given S3 URI, read the file from the S3 bucket and return a pandas dataframe
+
+    Args:
+        bucket_name (str): name of s3 bucket
+        bucket_path (str): path within s3 bucket where the file resides
+
+    """
+    s3 = boto3.client("s3")
+    obj = s3.get_object(Bucket=bucket_name, Key=bucket_path)
+    df = pd.read_csv(io.BytesIO(obj["Body"].read()))
+    return df
+
+
+def make_train_bucket_path(trainspace_data: TrainspaceData) -> str:
+    """
+    Given a TrainspaceData object, return the path to the bucket where the training data will be stored
+
+    Args:
+        trainspace_data (TrainspaceData): object containing data about the training data
+
+    Returns:
+        bucket_path (str): path to bucket where training data will be stored
+    """
+    uid = trainspace_data.uid
+    data_source = trainspace_data.data_source.lower()
+    filename = trainspace_data.dataset_data["name"]
+    return f"{uid}/{data_source}/{filename}"
+
+
 def get_presigned_url_from_bucket(bucket_name: str, bucket_path: str):
     """
     Given S3 URI, read the file from the S3 bucket