Skip to content

Commit

Permalink
Pipeline class in ruffus requires a name argument, added
Browse files Browse the repository at this point in the history
  • Loading branch information
Acribbs committed Oct 29, 2024
1 parent aadd91e commit 0637d49
Showing 1 changed file with 26 additions and 11 deletions.
37 changes: 26 additions & 11 deletions tests/test_s3_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,44 +26,59 @@ def setUp(self):
self.patcher = patch('cgatcore.remote.aws.boto3.resource')
self.mock_resource = self.patcher.start()
self.mock_resource.return_value = self.mock_s3
self.temp_dir = tempfile.mkdtemp()

def tearDown(self):
self.patcher.stop()
for file in os.listdir(self.temp_dir):
os.remove(os.path.join(self.temp_dir, file))
os.rmdir(self.temp_dir)

def test_s3_transform(self):
p = Pipeline()
p = Pipeline("test_s3_transform")

@P.s3_transform("s3://my-bucket/input.txt", suffix(".txt"), ".processed")
input_path = os.path.join(self.temp_dir, "input.txt")
output_path = os.path.join(self.temp_dir, "input.processed")

@P.s3_transform(input_path, suffix(".txt"), ".processed")
def process_file(infile, outfile):
with open(infile, 'r') as f_in, open(outfile, 'w') as f_out:
f_out.write(f_in.read().upper())

# Simulate input file
self.mock_s3.storage["my-bucket/input.txt"] = "hello world"
with open(input_path, 'w') as f:
f.write("hello world")

p.run()

self.assertIn("my-bucket/input.processed", self.mock_s3.storage)
self.assertEqual(self.mock_s3.storage["my-bucket/input.processed"], "HELLO WORLD")
self.assertTrue(os.path.exists(output_path))
with open(output_path, 'r') as f:
self.assertEqual(f.read(), "HELLO WORLD")

def test_s3_merge(self):
p = Pipeline()
p = Pipeline("test_s3_merge")

input_files = [os.path.join(self.temp_dir, f"file{i}.txt") for i in range(1, 3)]
output_file = os.path.join(self.temp_dir, "merged.txt")

@P.s3_merge(["s3://my-bucket/file1.txt", "s3://my-bucket/file2.txt"], "s3://my-bucket/merged.txt")
@P.s3_merge(input_files, output_file)
def merge_files(infiles, outfile):
with open(outfile, 'w') as f_out:
for infile in infiles:
with open(infile, 'r') as f_in:
f_out.write(f_in.read() + '\n')

# Simulate input files
self.mock_s3.storage["my-bucket/file1.txt"] = "content1"
self.mock_s3.storage["my-bucket/file2.txt"] = "content2"
for i, file in enumerate(input_files, 1):
with open(file, 'w') as f:
f.write(f"content{i}")

p.run()

self.assertIn("my-bucket/merged.txt", self.mock_s3.storage)
self.assertEqual(self.mock_s3.storage["my-bucket/merged.txt"], "content1\ncontent2\n")
self.assertTrue(os.path.exists(output_file))
with open(output_file, 'r') as f:
content = f.read().strip().split('\n')
self.assertEqual(content, ["content1", "content2"])


if __name__ == '__main__':
Expand Down

0 comments on commit 0637d49

Please sign in to comment.