Skip to content

Commit

Permalink
docs: add s3 documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulKalho committed Nov 20, 2024
1 parent a0c0dbf commit 39d8666
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 4 deletions.
6 changes: 5 additions & 1 deletion scystream/sdk/config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

CONFIG_FILE_DEFAULT_NAME = "cbc.yaml"
UNNAMED_APP_NAME = "unnamed_compute_block"
# TODO: is that right? Is ComputeBlock in Docker Network?
# In production, the ComputeBlock must be within the same docker network
# as the spark-master & workers!
COMPUTE_BLOCK_SPARK_DEFAULT_MASTER = "spark://spark-master:7077"


Expand Down Expand Up @@ -40,6 +41,9 @@ def set_config_path(self, config_path: str):
def get_config_path(self) -> str:
return self.config_path

def get_cb_spark_master(self) -> str:
return self.cb_spark_master


def _compare_configs(
config_from_yaml: Union[ComputeBlock, Entrypoint, InputOutputModel],
Expand Down
21 changes: 19 additions & 2 deletions scystream/sdk/file_handling/s3_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def __init__(
):
self.boto_client = boto3.client(
"s3",
endpoint_url=f"{config.endpoint}:{
config.port}",
endpoint_url=f"{config.endpoint}:{config.port}",
aws_access_key_id=config.access_key,
aws_secret_access_key=config.secret_key,
)
Expand All @@ -35,6 +34,15 @@ def upload_file(
bucket_name: str,
target_name: str
):
"""
Uploads a file from a local directory to the specified S3 bucket
:param path_to_file: Path to the local file.
:param bucket_name: The name of the bucket where the file will be
uploaded. If the bucket does not already exist, it will be created.
:param target_name: The name of the file after uploading.
"""

# TODO: Validate target_name to be not dangerous/invalid
self._create_bucket_if_not_exists(bucket_name)
self.boto_client.upload_file(
Expand All @@ -46,5 +54,14 @@ def download_file(
s3_object_name: str,
local_file_path: str
):
"""
Downlaods a file from the specified S3 bucket to a local path.
:param bucket_name: The bucket from where the file will be downloaded.
:param s3_object_name: The name of the file on the S3 bucket, which
will be downloaded.
:param local_file_path: The path to where the downloaded file will be
placed.
"""
self.boto_client.download_file(
bucket_name, s3_object_name, local_file_path)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"PyYAML>=6.0.2",
"pydantic-settings>=2.6.1",
"pyspark>=3.5.3",
"setuptools>=75.5.0"
"setuptools>=75.5.0",
"boto3>=1.35.65"
],
classifiers=[
"Programming Language :: Python :: 3",
Expand Down

0 comments on commit 39d8666

Please sign in to comment.