Skip to content

Commit

Permalink
consider case where no key exists in labels
Browse files Browse the repository at this point in the history
  • Loading branch information
dsethz committed Nov 1, 2024
1 parent 50193f1 commit 02007a9
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/nuclai/preprocessing/generate_cls_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,19 @@ def main():

# create dictionary
data = {}
unlabeled_ids = []
i, j = 0, 0
while i < len(features):
path = features[i]
idx = os.path.basename(path).split(".")[0].split("_")[-2]

# check if labels[idx] exists
if idx not in labels:
unlabeled_ids.append(path)
i += 1
continue

# if label exists and is in class_pos or class_neg add to data
if labels[idx]["ctype"] == class_pos:
tmp = {"path": path, "label": 1}
data[j] = tmp
Expand All @@ -119,6 +127,7 @@ def main():

print(f"Number of kept samples: {j}")
print(f"Number of removed samples: {i - j}")
print(f"Unlabeled samples: {unlabeled_ids}")

with open(os.path.join(path_out, f"{prefix}_data.json"), "w") as f:
json.dump(data, f)
Expand Down

0 comments on commit 02007a9

Please sign in to comment.