Skip to content

Commit

Permalink
Add support for including file path for collation (#30)
Browse files Browse the repository at this point in the history
Add support for including file path for collated files, this solves the
problem where if you are collecting files from multiple agents, file
path clashes occur. Adding the --include-file-path option will ensure
that the file names are unique
  • Loading branch information
rhyshort authored May 30, 2024
1 parent 04d3495 commit 190cbc2
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 2 deletions.
7 changes: 7 additions & 0 deletions harvest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def _init_arguments(self):
metavar="YYYY-MM-DD or YYYYMMDD",
default=False,
)
self.add_argument(
"--include-file-path",
help="Should the file path be included in the saved file names",
action="store_true",
dest="include_file_path",
)

def _validate_arguments(self, args):
if not args.end:
Expand Down Expand Up @@ -137,6 +143,7 @@ def _run(self, args):
"master",
args.repo_path,
args.no_validate,
include_file_path=args.include_file_path,
)

for file in args.filepath:
Expand Down
18 changes: 16 additions & 2 deletions harvest/collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,15 @@
class Collator(object):
"""Harvest collator to retrieve Git repository content."""

def __init__(self, repo_url, creds, branch, repo_path=None, validate=True):
def __init__(
self,
repo_url,
creds,
branch,
repo_path=None,
validate=True,
include_file_path=False,
):
"""Construct the Collator object."""
parsed = urlparse(repo_url)
self.scheme = parsed.scheme
Expand All @@ -38,6 +46,7 @@ def __init__(self, repo_url, creds, branch, repo_path=None, validate=True):
self.repo_path = repo_path
self.git_repo = None
self.validate = validate
self.include_file_path = include_file_path

@property
def local_path(self):
Expand Down Expand Up @@ -84,16 +93,21 @@ def read(self, filepath, from_dt, until_dt):
raise FileMissingError(f"{filepath} not found between {since} and {until}")
return commits

def write(self, filepath, commits):
def write(self, filepath: str, commits):
"""
Create file artifacts.
:param str filepath: The relative path to the file within the repo
:param list commits: A list of commits for a given file and date range
"""
file_path_include = ""
if self.include_file_path:
file_path_include = "_".join(filepath.rsplit("/")[:-1]) + "_"

for commit in commits:
file_name = (
f"./{self._ts_to_str(commit.committed_date)}_"
f"{file_path_include}"
f'{filepath.rsplit("/", 1).pop()}'
)
with open(file_name, "w+") as f:
Expand Down
24 changes: 24 additions & 0 deletions test/test_cli_collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,27 @@ def test_collate_local(self, mock_read, mock_write):
datetime(today.year, today.month, today.day),
)
mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"])

@patch("harvest.collator.Collator.write")
@patch("harvest.collator.Collator.read")
def test_collate_include_file_path(self, mock_read, mock_write):
"""Ensures collate sub-command works when '--include-file-path' is provided."""
mock_read.return_value = ["commit-foo"]
self.harvest.run(
[
"collate",
"local",
"my/path/baz.json",
"--include-file-path",
"--repo-path",
"os/repo/path",
]
)
today = datetime.today()

mock_read.assert_called_once_with(
"my/path/baz.json",
datetime(today.year, today.month, today.day),
datetime(today.year, today.month, today.day),
)
mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"])
12 changes: 12 additions & 0 deletions test/test_collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,18 @@ def test_write_functionality(self):
self.assertIn(call("./20191105_foo.json", "w+"), m.mock_calls)
self.assertIn(call("./20191101_foo.json", "w+"), m.mock_calls)

def test_write_includes_file_path(self):
m = mock_open()
with patch("builtins.open", m):
collator = Collator(*self.args, include_file_path=True)
collator.write("raw/foo/foo.json", self.commits)
handle = m()

self.assertEqual(handle.write.call_count, 3)
self.assertIn(call("./20191106_raw_foo_foo.json", "w+"), m.mock_calls)
self.assertIn(call("./20191105_raw_foo_foo.json", "w+"), m.mock_calls)
self.assertIn(call("./20191101_raw_foo_foo.json", "w+"), m.mock_calls)

@patch("harvest.collator.git.Repo.clone_from")
@patch("harvest.collator.os.path.isdir")
def test_checkout_clone(self, is_dir_mock, clone_from_mock):
Expand Down

0 comments on commit 190cbc2

Please sign in to comment.