fix example file to pass mypy

safurrier · Apr 14, 2024 · a19a5ca · a19a5ca
1 parent d85325b
commit a19a5ca
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 10 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,27 @@
+repos:
+-   repo: local
+    hooks:
+    -   id: mypy
+        name: Run MyPy
+        entry: make mypy
+        language: system
+        always_run: true
+        pass_filenames: false
+    -   id: lint
+        name: Run Linter
+        entry: make lint
+        language: system
+        always_run: true
+        pass_filenames: false
+    -   id: format
+        name: Run Formatter
+        entry: make format
+        language: system
+        always_run: true
+        pass_filenames: false
+    -   id: test
+        name: Run Tests
+        entry: make test
+        language: system
+        always_run: true
+        pass_filenames: false
diff --git a/src/data_prep/categorical.py b/src/data_prep/categorical.py
@@ -1,8 +1,10 @@
 """
 Data preparation methods for categorical variables.
 """
+
 import pandas as pd
 import numpy as np
+from typing import Optional, Dict, Any
 
 
 def lowercase_string(string: str) -> str:
@@ -31,8 +33,12 @@ def lowercase_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
     return df
 
 
-def extract_title(df: pd.DataFrame, col: str, replace_dict: dict = None,
-                  title_col: str = 'title') -> pd.DataFrame:
+def extract_title(
+    df: pd.DataFrame,
+    col: str,
+    replace_dict: Optional[Dict[str, Any]] = None,
+    title_col: str = "title",
+) -> pd.DataFrame:
     """Extracts titles into a new title column
 
     Args:
@@ -44,11 +50,13 @@ def extract_title(df: pd.DataFrame, col: str, replace_dict: dict = None,
     Returns:
         A DataFrame with an additional column of extracted titles
     """
-    df[title_col] = df[col].str.extract(r' ([A-Za-z]+)\.', expand=False)
+    df[title_col] = df[col].str.extract(r" ([A-Za-z]+)\.", expand=False)
 
     if replace_dict:
-        df[title_col] = np.where(df[title_col].isin(replace_dict.keys()),
-                                 df[title_col].map(replace_dict),
-                                 df[title_col])
+        df[title_col] = np.where(
+            df[title_col].isin(replace_dict.keys()),
+            df[title_col].map(replace_dict),
+            df[title_col],
+        )
 
     return df
diff --git a/src/data_prep/continuous.py b/src/data_prep/continuous.py
@@ -1,10 +1,11 @@
 """
 Data preparation methods for continuous variables.
 """
+
 import pandas as pd
 
 
-def fill_numeric(df: pd.DataFrame, col: str, fill_type: str = 'median') -> pd.DataFrame:
+def fill_numeric(df: pd.DataFrame, col: str, fill_type: str = "median") -> pd.DataFrame:
     """Fills missing values in numeric column specified.
 
     Args:
@@ -15,11 +16,11 @@ def fill_numeric(df: pd.DataFrame, col: str, fill_type: str = 'median') -> pd.Da
     Returns:
         A DataFrame with numeric_col filled.
     """
-    if fill_type == 'median':
+    if fill_type == "median":
         fill_value = df[col].median()  # type: float
-    elif fill_type == 'mean':
+    elif fill_type == "mean":
         fill_value = df[col].mean()
-    elif fill_type == '-1':
+    elif fill_type == "-1":
         fill_value = -1
     else:
         raise NotImplementedError('Valid fill_type options are "mean", "median", "-1')