Skip to content

Commit

Permalink
Implement basic functionality for Upload API (#380)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #380

* Add an `upload_file` API to storage service.
* Add an optional metadata param to upload APIs in all relevant interfaces. Most cloud providers support metadata param in their upload APIs, so it should be a fairly generic feature.

Some rationales behind these changes:
* Since object metadata is a generic feature provided by cloud providers, it makes sense to enable it in storage servic e layer
* It does not really make sense to add object metadata in the `copy` API, because we normally don't expect the files to be changed during copying
* So I'm adding a new upload_file to support this. We should be calling this API directly if we are specifically uploading files from local to the storage service.

reference: relevant [boto3 docs](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html) for metadata

**Next Step**: enable upload api with checksum associated with a file

Differential Revision: D38120426

fbshipit-source-id: 2e86ec3083b7d1d7d9b7193d937f16b2cd510492
  • Loading branch information
Yige Zhu authored and facebook-github-bot committed Jul 26, 2022
1 parent 6c81d67 commit 365ce5e
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 11 deletions.
10 changes: 9 additions & 1 deletion fbpcp/gateway/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,21 @@ def delete_bucket(self, bucket: str) -> None:
self.client.delete_bucket(Bucket=bucket)

@error_handler
def upload_file(self, file_name: str, bucket: str, key: str) -> None:
def upload_file(
self,
file_name: str,
bucket: str,
key: str,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
file_size = os.path.getsize(file_name)
metadata = metadata if metadata else {}
self.client.upload_file(
file_name,
bucket,
key,
Callback=self.ProgressPercentage(file_name, file_size),
ExtraArgs={"Metadata": metadata},
)

@error_handler
Expand Down
8 changes: 7 additions & 1 deletion fbpcp/service/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import abc
import re
from enum import Enum
from typing import List
from typing import Any, Dict, List, Optional

from fbpcp.entity.file_information import FileInfo
from fbpcp.entity.policy_statement import PolicyStatement, PublicAccessBlockConfig
Expand Down Expand Up @@ -75,3 +75,9 @@ def get_bucket_public_access_block(self, bucket: str) -> PublicAccessBlockConfig
@abc.abstractmethod
def list_files(self, dirPath: str) -> List[str]:
pass

@abc.abstractmethod
def upload_file(
self, source: str, destination: str, metadata: Optional[Dict[str, Any]] = None
) -> None:
pass
5 changes: 5 additions & 0 deletions fbpcp/service/storage_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ def copy(self, source: str, destination: str, recursive: bool = False) -> None:
dest_key=destination_gcs_path.key,
)

def upload_file(
self, source: str, destination: str, metadata: Optional[Dict[str, Any]]
) -> None:
raise NotImplementedError

def upload_dir(self, source: str, gcs_path_bucket: str, gcs_path_key: str) -> None:
"""Upload a directory from the filesystem to GCS
Expand Down
8 changes: 7 additions & 1 deletion fbpcp/service/storage_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def copy(self, source: str, destination: str, recursive: bool = False) -> None:
raise ValueError(f"Source {source} is a folder. Use --recursive")
self.upload_dir(source, s3_path.bucket, s3_path.key)
else:
self.s3_gateway.upload_file(source, s3_path.bucket, s3_path.key)
self.upload_file(source, destination)
else:
source_s3_path = S3Path(source)
if StorageService.path_type(destination) == PathType.S3:
Expand Down Expand Up @@ -111,6 +111,12 @@ def copy(self, source: str, destination: str, recursive: bool = False) -> None:
source_s3_path.bucket, source_s3_path.key, destination
)

def upload_file(
self, source: str, destination: str, metadata: Optional[Dict[str, Any]] = None
) -> None:
s3_path = S3Path(destination)
self.s3_gateway.upload_file(source, s3_path.bucket, s3_path.key, metadata)

def upload_dir(self, source: str, s3_path_bucket: str, s3_path_key: str) -> None:
for root, dirs, files in os.walk(source):
for file in files:
Expand Down
12 changes: 9 additions & 3 deletions onedocker/repository/onedocker_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# LICENSE file in the root directory of this source tree.

# pyre-strict
from typing import List
from typing import Any, Dict, List, Optional

from fbpcp.service.storage import StorageService
from onedocker.entity.package_info import PackageInfo
Expand All @@ -19,9 +19,15 @@ def __init__(self, storage_svc: StorageService, repository_path: str) -> None:
def _build_package_path(self, package_name: str, version: str) -> str:
return f"{self.repository_path}{package_name}/{version}/{package_name.split('/')[-1]}"

def upload(self, package_name: str, version: str, source: str) -> None:
def upload(
self,
package_name: str,
version: str,
source: str,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
package_path = self._build_package_path(package_name, version)
self.storage_svc.copy(source, package_path)
self.storage_svc.upload_file(source, package_path, metadata)

def download(self, package_name: str, version: str, destination: str) -> None:
package_path = self._build_package_path(package_name, version)
Expand Down
9 changes: 7 additions & 2 deletions onedocker/repository/onedocker_repository_service.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from datetime import datetime
from typing import Optional

from fbpcp.service.storage import StorageService
from onedocker.repository.onedocker_checksum import OneDockerChecksumRepository
from onedocker.repository.onedocker_package import OneDockerPackageRepository


class OnedockerRepositoryService:
class OneDockerRepositoryService:
def __init__(
self,
storage_svc: StorageService,
Expand All @@ -32,7 +34,10 @@ def upload(
source: str,
metadata: Optional[dict] = None,
) -> None:
raise NotImplementedError
today = datetime.today().strftime("%Y-%m-%d")
metadata = metadata if metadata else {}
metadata["upload_date"] = today
self.package_repo.upload(package_name, version, source, metadata)

def download(self, package_name: str, version: str, destination: str) -> None:
raise NotImplementedError
Expand Down
4 changes: 2 additions & 2 deletions onedocker/tests/repository/test_onedocker_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def test_onedockerrepo_upload(self):
)

# Assert
self.onedocker_repository.storage_svc.copy.assert_called_with(
source, self.expected_s3_dest
self.onedocker_repository.storage_svc.upload_file.assert_called_with(
source, self.expected_s3_dest, None
)

def test_onedockerrepo_download(self):
Expand Down
51 changes: 51 additions & 0 deletions onedocker/tests/repository/test_onedocker_repository_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import unittest
from datetime import datetime
from unittest.mock import MagicMock, patch

from onedocker.repository.onedocker_repository_service import OneDockerRepositoryService


class TestOneDockerRepositoryService(unittest.TestCase):
TEST_PACKAGE_PATH = "private_lift/lift"
TEST_PACKAGE_NAME = TEST_PACKAGE_PATH.split("/")[-1]
TEST_PACKAGE_VERSION = "latest"

@patch(
"onedocker.repository.onedocker_repository_service.OneDockerChecksumRepository"
)
@patch(
"onedocker.repository.onedocker_repository_service.OneDockerPackageRepository"
)
@patch("fbpcp.service.storage_s3.S3StorageService")
def setUp(
self, mockStorageService, mockPackageRepoCall, mockChecksumRepoCall
) -> None:
package_repo_path = "/package_repo_path/"
checksum_repo_path = "/checksum_repo_path/"
self.package_repo = MagicMock()
mockPackageRepoCall.return_value = self.package_repo
self.repo_service = OneDockerRepositoryService(
mockStorageService, package_repo_path, checksum_repo_path
)

def test_onedocker_repo_service_upload(self) -> None:
# Arrange
source_path = "test_source_path"
today = datetime.today().strftime("%Y-%m-%d")
metadata = {"upload_date": today}

# Act
self.repo_service.upload(
self.TEST_PACKAGE_PATH, self.TEST_PACKAGE_VERSION, source_path
)

# Assert
self.package_repo.upload.assert_called_with(
self.TEST_PACKAGE_PATH, self.TEST_PACKAGE_VERSION, source_path, metadata
)
2 changes: 1 addition & 1 deletion tests/service/test_storage_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_copy_local_to_s3(self, MockS3Gateway):
service.s3_gateway.upload_file = MagicMock(return_value=None)
service.copy(self.LOCAL_FILE, self.S3_FILE)
service.s3_gateway.upload_file.assert_called_with(
str(self.LOCAL_FILE), "bucket", "test_file"
str(self.LOCAL_FILE), "bucket", "test_file", None
)

def test_copy_local_dir_to_s3_recursive_false(self):
Expand Down

0 comments on commit 365ce5e

Please sign in to comment.