Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for including file path for collation #30

Merged
merged 1 commit into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions harvest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def _init_arguments(self):
metavar="YYYY-MM-DD or YYYYMMDD",
default=False,
)
self.add_argument(
"--include-file-path",
help="Should the file path be included in the saved file names",
action="store_true",
dest="include_file_path",
)

def _validate_arguments(self, args):
if not args.end:
Expand Down Expand Up @@ -137,6 +143,7 @@ def _run(self, args):
"master",
args.repo_path,
args.no_validate,
include_file_path=args.include_file_path,
)

for file in args.filepath:
Expand Down
18 changes: 16 additions & 2 deletions harvest/collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,15 @@
class Collator(object):
"""Harvest collator to retrieve Git repository content."""

def __init__(self, repo_url, creds, branch, repo_path=None, validate=True):
def __init__(
self,
repo_url,
creds,
branch,
repo_path=None,
validate=True,
include_file_path=False,
):
"""Construct the Collator object."""
parsed = urlparse(repo_url)
self.scheme = parsed.scheme
Expand All @@ -38,6 +46,7 @@ def __init__(self, repo_url, creds, branch, repo_path=None, validate=True):
self.repo_path = repo_path
self.git_repo = None
self.validate = validate
self.include_file_path = include_file_path

@property
def local_path(self):
Expand Down Expand Up @@ -84,16 +93,21 @@ def read(self, filepath, from_dt, until_dt):
raise FileMissingError(f"{filepath} not found between {since} and {until}")
return commits

def write(self, filepath, commits):
def write(self, filepath: str, commits):
"""
Create file artifacts.

:param str filepath: The relative path to the file within the repo
:param list commits: A list of commits for a given file and date range
"""
file_path_include = ""
if self.include_file_path:
file_path_include = "_".join(filepath.rsplit("/")[:-1]) + "_"

for commit in commits:
file_name = (
f"./{self._ts_to_str(commit.committed_date)}_"
f"{file_path_include}"
f'{filepath.rsplit("/", 1).pop()}'
)
with open(file_name, "w+") as f:
Expand Down
24 changes: 24 additions & 0 deletions test/test_cli_collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,27 @@ def test_collate_local(self, mock_read, mock_write):
datetime(today.year, today.month, today.day),
)
mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"])

@patch("harvest.collator.Collator.write")
@patch("harvest.collator.Collator.read")
def test_collate_include_file_path(self, mock_read, mock_write):
"""Ensures collate sub-command works when '--include-file-path' is provided."""
mock_read.return_value = ["commit-foo"]
self.harvest.run(
[
"collate",
"local",
"my/path/baz.json",
"--include-file-path",
"--repo-path",
"os/repo/path",
]
)
today = datetime.today()

mock_read.assert_called_once_with(
"my/path/baz.json",
datetime(today.year, today.month, today.day),
datetime(today.year, today.month, today.day),
)
mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"])
12 changes: 12 additions & 0 deletions test/test_collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,18 @@ def test_write_functionality(self):
self.assertIn(call("./20191105_foo.json", "w+"), m.mock_calls)
self.assertIn(call("./20191101_foo.json", "w+"), m.mock_calls)

def test_write_includes_file_path(self):
m = mock_open()
with patch("builtins.open", m):
collator = Collator(*self.args, include_file_path=True)
collator.write("raw/foo/foo.json", self.commits)
handle = m()

self.assertEqual(handle.write.call_count, 3)
self.assertIn(call("./20191106_raw_foo_foo.json", "w+"), m.mock_calls)
self.assertIn(call("./20191105_raw_foo_foo.json", "w+"), m.mock_calls)
self.assertIn(call("./20191101_raw_foo_foo.json", "w+"), m.mock_calls)

@patch("harvest.collator.git.Repo.clone_from")
@patch("harvest.collator.os.path.isdir")
def test_checkout_clone(self, is_dir_mock, clone_from_mock):
Expand Down
Loading