Skip to content

Commit

Permalink
need sanity
Browse files Browse the repository at this point in the history
  • Loading branch information
Parry-Parry committed Nov 12, 2024
1 parent 517cb1b commit fc4382e
Showing 1 changed file with 1 addition and 7 deletions.
8 changes: 1 addition & 7 deletions rankers/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,6 @@ def _get_line_by_index(self, idx):
f.seek(self.line_offsets[idx])
return json.loads(f.readline())

def _data_generator(self):
"""Generator for reading JSON lines from a compressed or uncompressed file."""

with open(self.training_dataset_file, 'r', encoding="utf-8") as f:
for line in f:
yield json.loads(line)

def __post_init__(self):
assert self.corpus is not None, "Cannot instantiate a text-based dataset without a lookup"

Expand Down Expand Up @@ -114,6 +107,7 @@ def __getitem__(self, idx):
texts, scores = zip(*sorted(zip(texts, scores), key=lambda x: x[1], reverse=True))
return (query, texts[:self.group_size], scores[:self.group_size])
else:
breakpoint()
texts, scores = zip(*random.sample(list(zip(texts, scores)), self.group_size))
return (query, texts, scores)
else:
Expand Down

0 comments on commit fc4382e

Please sign in to comment.