Skip to content

Commit

Permalink
Sample ID is now used as the column names in outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
mattheww95 committed Nov 21, 2024
1 parent 899e35b commit a084717
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 11 deletions.
32 changes: 26 additions & 6 deletions bin/report_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def write_table(self, table_data: Dict[str, Dict[str, str]]):
"""
keys = set([k for k in table_data])
ordered_keys = []

# Get the wanted information to the top of the page
poisoned_keys = set()
for option in self.__key_order:
Expand All @@ -80,7 +79,6 @@ def write_table(self, table_data: Dict[str, Dict[str, str]]):
ordered_keys.extend(scalar_keys)
ordered_keys.extend(sorted([i for i in keys if i not in ordered_keys and i not in poisoned_keys]))
row_labels = sorted([i for i in next(iter(table_data.values()))])

self.write_tsv(table_data, row_labels, ordered_keys)
self.write_transposed_tsv(table_data, row_labels, ordered_keys)

Expand Down Expand Up @@ -135,6 +133,29 @@ def update_table_labels(self, table, keys, info: CleaningInfo):
del table[previous]
return sorted(processed_keys), poisoned_keys

def key_saver(self, sample_name, keys):
"""
As we split on the period delimiter, and periods are allowed in
sample names, some special care needs to be considered in splitting
the sample names as to not accidentally drop characters in the names.
sample_name str: The sample name to be saved from the split string
keys list[str]: List of keys to split
"""
return_values = []
for k in keys:
if k.startswith(sample_name):
sample_name_len = len(sample_name)
if sample_name.endswith(self.__key_delimiter):
# Need to remove the next delimiter as well if the sample
# ends with one. Or else we end up with an empty string inserted
sample_name_len += 1
split_string = k[sample_name_len:]
sample_keys = [sample_name, *[i for i in split_string.split(self.__key_delimiter)]]
return_values.append(sample_keys)
continue
return_values.append(k.split(self.__key_delimiter))
return return_values

def make_table(self, data):
"""Create an aggregated table of report data from mikrokondo
Expand All @@ -146,7 +167,7 @@ def make_table(self, data):

sample_data = defaultdict(list)
for k, v in data.items():
keys = [i.split(self.__key_delimiter) for i in v.keys()]
keys = self.key_saver(k, v.keys())
copy_keys = []
tool_keys = set()
for i in keys:
Expand Down Expand Up @@ -266,11 +287,11 @@ def output_indv_json(self, flattened_data):
for k, v in flattened_data.items():
out_key = k
sample_dir = k
if dir_name := v.get(self.__inx_irida_key) != k:
dir_name = v.get(self.__inx_irida_key)
if k != dir_name:
sample_dir = dir_name
#! this field affects the identification of the irida next id being passed out of the pipeline
out_key = sample_dir # this field must be overwritten for iridanext to identify the correct metdata field

out_dir = os.path.join(self.output_dir, sample_dir)
out_path = os.path.join(out_dir, k + self.flat_sample_string)
if not os.path.isdir(out_dir): # Check for directory existence, as it will still exist on pipeline resumes
Expand Down Expand Up @@ -298,7 +319,6 @@ def to_file(self):
out_file.write(f'"{val_write}"')
else:
out_file.write(val_write)
# out_file.write(str(ii[1][i]).replace('\n', ' \\'))
out_file.write(self.__delimiter)
out_file.write("\n")

Expand Down
7 changes: 4 additions & 3 deletions modules/local/report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ process REPORT{
}

update_map_values(sample_data, meta_data, "metagenomic")
update_map_values(sample_data, meta_data, "id")
update_map_values(sample_data, meta_data, "sample")
update_map_values(sample_data, meta_data, "external_id")
update_map_values(sample_data, meta_data, "assembly")
Expand All @@ -55,8 +56,8 @@ process REPORT{
update_map_values(sample_data, meta_data, "merge")
update_map_values(sample_data, meta_data, "downsampled")

if(!sample_data[meta_data.sample].containsKey(meta_data.external_id)){
sample_data[meta_data.sample][meta_data.external_id] = [:]
if(!sample_data[meta_data.sample].containsKey(meta_data.id)){
sample_data[meta_data.sample][meta_data.id] = [:]
}

if(report_value instanceof Path){
Expand All @@ -70,7 +71,7 @@ process REPORT{
}
}

sample_data[meta_data.sample][meta_data.external_id][report_tag.report_tag] = report_value
sample_data[meta_data.sample][meta_data.id][report_tag.report_tag] = report_value
}


Expand Down
5 changes: 3 additions & 2 deletions subworkflows/local/input_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ workflow INPUT_CHECK {
// hidden files causing odd issues later on in the pipeline

if(meta[0].id == null){
// Remove any unallowed charactars in the meta.id field
// Remove any unallowed characters in the meta.id field
meta[0].id = meta[0].external_id.replaceAll(/^\./, '_')
meta[0].id = meta[0].id.replaceAll(/[^A-Za-z0-9_\.\-]/, '_')
}else {
Expand Down Expand Up @@ -133,7 +133,8 @@ def format_reads(ArrayList sheet_data){
def meta = [:]
def error_occured = false
meta.id = sheet_data[0] // id is first value
meta.sample = sheet_data[1].external_id
//meta.sample = sheet_data[1].external_id
meta.sample = sheet_data[0]
meta.external_id = sheet_data[1].external_id

meta.hybrid = false
Expand Down

0 comments on commit a084717

Please sign in to comment.