Skip to content

Commit

Permalink
recertification -> reevaluation in code
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjanovsky committed Oct 20, 2023
1 parent d21539e commit c7ad844
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion notebooks/cc/reference_annotations/prediction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"df = df.merge(all_annotations.loc[:, [\"dgst\", \"referenced_cert_id\", \"name_similarity_stripped_version\", \"name_len_diff\", \"cert_name\", \"referenced_cert_name\"]], on=[\"dgst\", \"referenced_cert_id\"])\n",
"\n",
"# Simplified binary labels\n",
"# label_mapping = {\"COMPONENT_USED\": \"COMPONENT_SHARED\", \"RECERTIFICATION\": \"PREVIOUS_VERSION\"}\n",
"# label_mapping = {\"COMPONENT_USED\": \"COMPONENT_SHARED\", \"REEVALUATION\": \"PREVIOUS_VERSION\"}\n",
"# df.label = df.label.map(lambda x: label_mapping[x] if x in label_mapping else x)\n"
]
},
Expand Down
8 changes: 4 additions & 4 deletions src/sec_certs/model/references/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def predict_df(self, df: pd.DataFrame) -> pd.DataFrame:
WIll read df.segments and populate the dataframe with predictions.
"""

def matches_recertification(segments: list[str]) -> bool:
def matches_reevaluation(segments: list[str]) -> bool:
regex_a = r"This is a re-?\s?certification based on (the\s){1,2}referenced product"
regex_b = r"Re-?\s?Zertifizierung basierend auf (the\s){1,2}referenced product"
return any(
Expand All @@ -113,12 +113,12 @@ def matches_recertification(segments: list[str]) -> bool:
df_new.loc[
(df_new.name_similarity == 100)
& (df_new.name_len_diff < 5)
& ((df_new.y_pred != "RECERTIFICATION") & (df_new.y_pred != "PREVIOUS_VERSION")),
& ((df_new.y_pred != "RE-EVALUATION") & (df_new.y_pred != "PREVIOUS_VERSION")),
["y_pred"],
] = "PREVIOUS_VERSION"

df_new["matches_recertification"] = df_new.segments.map(matches_recertification)
df_new.loc[df_new.matches_recertification, ["y_pred"]] = "RECERTIFICATION"
df_new["maches_reevaluation"] = df_new.segments.map(matches_reevaluation)
df_new.loc[df_new.maches_reevaluation, ["y_pred"]] = "RE-EVALUATION"

df_new["correct"] = df_new.apply(
lambda row: row["y_pred"] == row["label"] if not pd.isnull(row["label"]) else np.NaN, axis=1
Expand Down

0 comments on commit c7ad844

Please sign in to comment.