Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚡ update manifest format #10

Merged
merged 2 commits into from
Jul 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions d3b_dff_cli/modules/validation/check_manifest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import argparse
import csv
import pandas as pd

# Define a function to perform validation
def validate_row(row, rules):
Expand Down Expand Up @@ -60,18 +60,30 @@ def validate_row(row, rules):
def main(args):
rule_type = args.rule_type
rules_json = args.rules
manifest_data = []
with open(args.manifest_file, "r") as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
manifest_data.append(row)

manifest = args.manifest_file

file_extension = manifest.split('.')[-1].lower()
if file_extension == 'csv':
manifest_data = pd.read_csv(manifest)
elif file_extension == 'tsv':
manifest_data = pd.read_csv(manifest, delimiter='\t')
elif file_extension in ['xls', 'xlsx']:
xlsx = pd.ExcelFile(manifest)
if len(xlsx.sheet_names) == 1:
manifest_data = pd.read_excel(xlsx)
elif "Genomics_Manifest" in xlsx.sheet_names:
manifest_data = pd.read_excel(xlsx, "Genomics_Manifest")
else:
raise ValueError(f"Genomics_Manifest sheet not found in {manifest}")
else:
raise ValueError("Unsupported file format. Please provide a CSV, TSV, or Excel file.")

with open(rules_json, "r") as json_file:
validation_rules = json.load(json_file)[rule_type]

# Iterate through each row in the DataFrame and perform validation
validation_failed = False
for index, row in enumerate(manifest_data):
for index, row in manifest_data.iterrows():
is_valid, messages = validate_row(row, validation_rules)
if not is_valid:
error_message = "Validation Failed For Row {0}:\n{1}".format(index + 1, '\n'.join(messages))
Expand Down