Skip to content

Commit

Permalink
Consistency: 'missing_values' -> 'missing_val_coding'
Browse files Browse the repository at this point in the history
  • Loading branch information
smmaurer committed Aug 11, 2016
1 parent 3087259 commit 591b534
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions orca_test/orca_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,37 +496,37 @@ def assert_column_is_numeric(table_name, column_name):
return


def strip_missing_values(series, missing_values=np.nan):
def strip_missing_values(series, missing_val_coding=np.nan):
"""
Helper function. Returns a pd.Series with missing values stripped.
Parameters
----------
series : pandas.Series
missing_values : {0, -1, np.nan}, optional
missing_val_coding : {0, -1, np.nan}, optional
Value that indicates missing entries.
Returns
-------
series : pandas.Series
"""
if np.isnan(missing_values):
if np.isnan(missing_val_coding):
return series.dropna()

else:
return series[series != missing_values].copy()
return series[series != missing_val_coding].copy()


def assert_column_missing_value_coding(table_name, column_name, missing_values):
def assert_column_missing_value_coding(table_name, column_name, missing_val_coding):
"""
Asserts that a column's missing entries are all coded with a particular value.
Parameters
----------
table_name : str
column_name : str
missing_values : {0, -1}
missing_val_coding : {0, -1}
Value that indicates missing entires.
Returns
Expand All @@ -536,18 +536,18 @@ def assert_column_missing_value_coding(table_name, column_name, missing_values):
"""
assert_column_can_be_generated(table_name, column_name)
ds = get_column_or_index(table_name, column_name)
ds = strip_missing_values(ds, missing_values)
ds = strip_missing_values(ds, missing_val_coding)

try:
assert sum(pd.isnull(ds)) == 0
except:
msg = "Column '%s' has null entries that are not coded as %s" \
% (column_name, str(missing_values))
% (column_name, str(missing_val_coding))
raise OrcaAssertionError(msg)
return


def assert_column_max(table_name, column_name, max, missing_values=np.nan):
def assert_column_max(table_name, column_name, max, missing_val_coding=np.nan):
"""
Asserts a maximum value for a numeric column, ignoring missing values.
Expand All @@ -557,7 +557,7 @@ def assert_column_max(table_name, column_name, max, missing_values=np.nan):
column_name : str
max : int or float
Maximum value.
missing_values : {0, -1, np.nan}, optional
missing_val_coding : {0, -1, np.nan}, optional
Value that indicates missing entires.
Returns
Expand All @@ -567,7 +567,7 @@ def assert_column_max(table_name, column_name, max, missing_values=np.nan):
"""
assert_column_is_numeric(table_name, column_name)
ds = get_column_or_index(table_name, column_name)
ds = strip_missing_values(ds, missing_values)
ds = strip_missing_values(ds, missing_val_coding)

try:
assert ds.max() <= max
Expand All @@ -578,7 +578,7 @@ def assert_column_max(table_name, column_name, max, missing_values=np.nan):
return


def assert_column_min(table_name, column_name, min, missing_values=np.nan):
def assert_column_min(table_name, column_name, min, missing_val_coding=np.nan):
"""
Asserts a minimum value for a numeric column, ignoring missing values.
Expand All @@ -588,7 +588,7 @@ def assert_column_min(table_name, column_name, min, missing_values=np.nan):
column_name : str
min : int or float
Minimum value.
missing_values : {0, -1, np.nan}, optional
missing_val_coding : {0, -1, np.nan}, optional
Value that indicates missing entires.
Returns
Expand All @@ -598,7 +598,7 @@ def assert_column_min(table_name, column_name, min, missing_values=np.nan):
"""
assert_column_is_numeric(table_name, column_name)
ds = get_column_or_index(table_name, column_name)
ds = strip_missing_values(ds, missing_values)
ds = strip_missing_values(ds, missing_val_coding)

try:
assert ds.min() >= min
Expand All @@ -609,7 +609,7 @@ def assert_column_min(table_name, column_name, min, missing_values=np.nan):
return


def assert_column_max_portion_missing(table_name, column_name, portion, missing_values=np.nan):
def assert_column_max_portion_missing(table_name, column_name, portion, missing_val_coding=np.nan):
"""
Assert the maximum portion of a column's entries that may be missing.
Expand All @@ -619,7 +619,7 @@ def assert_column_max_portion_missing(table_name, column_name, portion, missing_
column_name : str
portion : float from 0 to 1
Maximum portion of entries that may be missing.
missing_values : {0, -1, np.nan}, optional
missing_val_coding : {0, -1, np.nan}, optional
Value that indicates missing entires.
Returns
Expand All @@ -629,7 +629,7 @@ def assert_column_max_portion_missing(table_name, column_name, portion, missing_
"""
assert_column_can_be_generated(table_name, column_name)
ds = get_column_or_index(table_name, column_name)
missing = len(ds) - len(strip_missing_values(ds, missing_values))
missing = len(ds) - len(strip_missing_values(ds, missing_val_coding))
missing_portion = float(missing) / len(ds)

# Format as percentages for output
Expand All @@ -645,10 +645,10 @@ def assert_column_max_portion_missing(table_name, column_name, portion, missing_
return


def assert_column_no_missing_values(table_name, column_name, missing_values=np.nan):
def assert_column_no_missing_values(table_name, column_name, missing_val_coding=np.nan):
"""
"""
assert_column_max_portion_missing(table_name, column_name, 0, missing_values)
assert_column_max_portion_missing(table_name, column_name, 0, missing_val_coding)
return


Expand Down

0 comments on commit 591b534

Please sign in to comment.