From f39696de4a68ada9ad6040f173f5b6b19a24eebc Mon Sep 17 00:00:00 2001 From: fzayguler <65349101+fzayguler@users.noreply.github.com> Date: Fri, 10 May 2024 16:39:09 +0000 Subject: [PATCH] investigate datasets --- authorship-verification-submission/run.py | 2 +- authorship-verification-submission/train.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/authorship-verification-submission/run.py b/authorship-verification-submission/run.py index 268f45e..4e664ff 100644 --- a/authorship-verification-submission/run.py +++ b/authorship-verification-submission/run.py @@ -20,7 +20,7 @@ predictions = model.predict(df["text"]) df["generated"] = predictions df = df[["id", "generated"]] - + print(len(df.index)) # Save the predictions output_directory = get_output_directory(str(Path(__file__).parent)) diff --git a/authorship-verification-submission/train.py b/authorship-verification-submission/train.py index afd9363..ba520fc 100644 --- a/authorship-verification-submission/train.py +++ b/authorship-verification-submission/train.py @@ -25,7 +25,10 @@ df = text.join(labels.set_index("id")) # print the first 5 rows - #print(df.head()) + print(df.head()) + #print the number of rows + print(len(df.index)) + # export the data to a json file with indent=4 #df.to_json(Path(__file__).parent / "data.json", indent=4)