Skip to content

Commit

Permalink
fix example file to pass mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
safurrier committed Apr 14, 2024
1 parent d85325b commit a19a5ca
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 10 deletions.
27 changes: 27 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
repos:
- repo: local
hooks:
- id: mypy
name: Run MyPy
entry: make mypy
language: system
always_run: true
pass_filenames: false
- id: lint
name: Run Linter
entry: make lint
language: system
always_run: true
pass_filenames: false
- id: format
name: Run Formatter
entry: make format
language: system
always_run: true
pass_filenames: false
- id: test
name: Run Tests
entry: make test
language: system
always_run: true
pass_filenames: false
20 changes: 14 additions & 6 deletions src/data_prep/categorical.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""
Data preparation methods for categorical variables.
"""

import pandas as pd
import numpy as np
from typing import Optional, Dict, Any


def lowercase_string(string: str) -> str:
Expand Down Expand Up @@ -31,8 +33,12 @@ def lowercase_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
return df


def extract_title(df: pd.DataFrame, col: str, replace_dict: dict = None,
title_col: str = 'title') -> pd.DataFrame:
def extract_title(
df: pd.DataFrame,
col: str,
replace_dict: Optional[Dict[str, Any]] = None,
title_col: str = "title",
) -> pd.DataFrame:
"""Extracts titles into a new title column
Args:
Expand All @@ -44,11 +50,13 @@ def extract_title(df: pd.DataFrame, col: str, replace_dict: dict = None,
Returns:
A DataFrame with an additional column of extracted titles
"""
df[title_col] = df[col].str.extract(r' ([A-Za-z]+)\.', expand=False)
df[title_col] = df[col].str.extract(r" ([A-Za-z]+)\.", expand=False)

if replace_dict:
df[title_col] = np.where(df[title_col].isin(replace_dict.keys()),
df[title_col].map(replace_dict),
df[title_col])
df[title_col] = np.where(
df[title_col].isin(replace_dict.keys()),
df[title_col].map(replace_dict),
df[title_col],
)

return df
9 changes: 5 additions & 4 deletions src/data_prep/continuous.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""
Data preparation methods for continuous variables.
"""

import pandas as pd


def fill_numeric(df: pd.DataFrame, col: str, fill_type: str = 'median') -> pd.DataFrame:
def fill_numeric(df: pd.DataFrame, col: str, fill_type: str = "median") -> pd.DataFrame:
"""Fills missing values in numeric column specified.
Args:
Expand All @@ -15,11 +16,11 @@ def fill_numeric(df: pd.DataFrame, col: str, fill_type: str = 'median') -> pd.Da
Returns:
A DataFrame with numeric_col filled.
"""
if fill_type == 'median':
if fill_type == "median":
fill_value = df[col].median() # type: float
elif fill_type == 'mean':
elif fill_type == "mean":
fill_value = df[col].mean()
elif fill_type == '-1':
elif fill_type == "-1":
fill_value = -1
else:
raise NotImplementedError('Valid fill_type options are "mean", "median", "-1')
Expand Down

0 comments on commit a19a5ca

Please sign in to comment.