Skip to content

Commit

Permalink
add inter-annotator agreement on simplified labels
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjanovsky committed Nov 24, 2023
1 parent 0dbcefe commit 844ae8a
Showing 1 changed file with 42 additions and 9 deletions.
51 changes: 42 additions & 9 deletions notebooks/cc/reference_annotations/inter_annotator_agreement.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,36 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cohen's Kappa: 0.7101271765978729\n",
"Percentage agreement: 0.8225\n"
"Results on 5 classes:\n",
"\t- Cohen's Kappa: 0.7101271765978729\n",
"\t- Percentage agreement: 0.8225\n",
"Results on simplified 2 classes:\n",
"\t- Cohen's Kappa: 0.8424203759140207\n",
"\t- Percentage agreement: 0.9437869822485208\n"
]
}
],
"source": [
"import pandas as pd\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.metrics import cohen_kappa_score\n",
"\n",
"label_mapping = {\n",
" \"COMPONENT_USED\": \"COMPONENT_USED\",\n",
" \"RE-EVALUATION\": \"PREVIOUS_VERSION\",\n",
" \"EVALUATION_REUSED\": \"COMPONENT_USED\",\n",
" \"PREVIOUS_VERSION\": \"PREVIOUS_VERSION\",\n",
" \"COMPONENT_SHARED\": \"COMPONENT_USED\",\n",
"}\n",
"\n",
"\n",
"def load_all_dataframes(base_folder: Path) -> pd.DataFrame:\n",
" splits = [\"train\", \"valid\", \"test\"]\n",
Expand All @@ -33,17 +46,37 @@
" else:\n",
" df_test = df\n",
"\n",
" return pd.concat([df_train, df_valid, df_test])\n",
" df_to_return = pd.concat([df_train, df_valid, df_test])\n",
" return df_to_return.assign(label=lambda df_: df_.label.fillna(\"unknown\")).assign(\n",
" label=lambda df_: df_.label.str.upper(),\n",
" simplified_label=lambda df_: df_.label.map(label_mapping),\n",
" )\n",
"\n",
"\n",
"REPO_ROOT = Path(\".\")\n",
"REPO_ROOT = Path()\n",
"\n",
"\n",
"adam_df = load_all_dataframes(REPO_ROOT / \"src/sec_certs/data/reference_annotations/adam\")\n",
"jano_df = load_all_dataframes(REPO_ROOT / \"src/sec_certs/data/reference_annotations/jano\")\n",
"agreement_series = adam_df.label == jano_df.label\n",
"\n",
"print(f\"Cohen's Kappa: {cohen_kappa_score(adam_df.label, jano_df.label)}\")\n",
"print(f\"Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}\")\n"
"print(\"Results on 5 classes:\")\n",
"print(f\"\\t- Cohen's Kappa: {cohen_kappa_score(adam_df.label, jano_df.label)}\")\n",
"print(f\"\\t- Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}\")\n",
"\n",
"indices_to_drop = set(adam_df.loc[adam_df.simplified_label.isnull()].index.tolist()) | set(\n",
" jano_df.loc[jano_df.simplified_label.isnull()].index.tolist()\n",
")\n",
"adam_df_simplified = adam_df.drop(indices_to_drop)\n",
"jano_df_simplified = jano_df.drop(indices_to_drop)\n",
"agreement_series = adam_df_simplified.simplified_label == jano_df_simplified.simplified_label\n",
"\n",
"\n",
"print(\"Results on simplified 2 classes:\")\n",
"print(\n",
" f\"\\t- Cohen's Kappa: {cohen_kappa_score(adam_df_simplified.simplified_label, jano_df_simplified.simplified_label)}\"\n",
")\n",
"print(f\"\\t- Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}\")\n"
]
}
],
Expand All @@ -63,7 +96,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.6"
},
"orig_nbformat": 4
},
Expand Down

0 comments on commit 844ae8a

Please sign in to comment.