Skip to content

Commit

Permalink
resolve error that make_single_content_qa generate duplicate qid (#562)
Browse files Browse the repository at this point in the history
Co-authored-by: jeffrey <[email protected]>
  • Loading branch information
vkehfdl1 and jeffrey authored Jul 8, 2024
1 parent e018191 commit 10c48b8
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 1 deletion.
2 changes: 1 addition & 1 deletion autorag/data/qacreation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ def make_single_content_qa(corpus_df: pd.DataFrame,

qa = qa_creation_func(contents=sampled_corpus['contents'].tolist(), **kwargs)
qa_data = pd.DataFrame({
'qid': [str(uuid.uuid4()) for _ in range(len(qa))],
'qa': qa,
'retrieval_gt': sampled_corpus['doc_id'].tolist(),
})
qa_data = qa_data.explode('qa', ignore_index=True)
qa_data['qid'] = [str(uuid.uuid4()) for _ in range(len(qa_data))]

def make_query_generation_gt(row):
return row['qa']['query'], row['qa']['generation_gt']
Expand Down
1 change: 1 addition & 0 deletions tests/autorag/data/qacreation/test_base_qacreation.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def test_single_content_qa(qa_parquet_filepath):
upsert=True,
)
validate_qa_dataset(qa_df)
assert len(qa_df) == qa_df['qid'].nunique()
assert len(qa_df) == 6
assert qa_df['retrieval_gt'].tolist()[0] == qa_df['retrieval_gt'].tolist()[1]

Expand Down

0 comments on commit 10c48b8

Please sign in to comment.