Skip to content

Commit

Permalink
First pass of validation rules for FOCUS 1.0
Browse files Browse the repository at this point in the history
Signed-off-by: Joshua Kwan <[email protected]>
  • Loading branch information
joshk0 committed Nov 22, 2023
1 parent b3df7cb commit 30b645d
Show file tree
Hide file tree
Showing 74 changed files with 456 additions and 23 deletions.
1 change: 1 addition & 0 deletions focus_validator/config_objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class DataTypes(Enum):
DECIMAL = "decimal"
DATETIME = "datetime"
CURRENCY_CODE = "currency-code"
STRINGIFIED_JSON_OBJECT = "stringified-json-object"


class DataTypeCheck(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ def __generate_column_definition__(
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
elif data_type == DataTypes.STRINGIFIED_JSON_OBJECT:
pandera_type = None
column_checks.append(
pa.Check.check_stringified_json_object_dtype(
ignore_na=True,
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
else:
pandera_type = pa.String

Expand Down
2 changes: 1 addition & 1 deletion focus_validator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
help="Allow transitional rules in validation",
)
parser.add_argument(
"--validate-version", default="0.5", help="Version of FOCUS to validate against"
"--validate-version", default="1.0", help="Version of FOCUS to validate against"
)
parser.add_argument(
"--rule-set-path",
Expand Down
34 changes: 18 additions & 16 deletions focus_validator/outputter/outputter_console.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import math

import pandas as pd
from tabulate import tabulate

from focus_validator.config_objects import Rule
from focus_validator.rules.spec_rules import ValidationResult
Expand Down Expand Up @@ -56,33 +55,36 @@ def __restructure_check_list__(result_set: ValidationResult):

def write(self, result_set: ValidationResult):
self.result_set = result_set

checklist = self.__restructure_check_list__(result_set)
print("Checklist:")
print(tabulate(checklist, headers="keys", tablefmt="psql"))

if result_set.failure_cases is not None:
aggregated_failures = result_set.failure_cases.groupby(
by=["Check Name", "Column", "Description"], as_index=False
by=["Check Name", "Description"], as_index=False
).aggregate(lambda x: collapse_occurrence_range(x.unique().tolist()))

print("Checks summary:")
print(
tabulate(
tabular_data=aggregated_failures, # type: ignore
headers="keys",
tablefmt="psql",
print("Errors encountered:")
for _, fail in aggregated_failures.iterrows():
print(
f'{fail["Check Name"]} failed:\n\tDescription: {fail["Description"]}\n\tRows: {fail["Row #"] if fail["Row #"] else "(whole file)"}\n\tExample values: {fail["Values"] if fail["Values"] else "(none)"}\n'
)
)
print("Validation failed!")
else:
print("Validation succeeded.")


def collapse_occurrence_range(occurrence_range: list):
start = None
i = None
collapsed = []

# Edge case
if len(occurrence_range) == 1:
if isinstance(occurrence_range[0], float) and math.isnan(occurrence_range[0]):
return ""
if occurrence_range[0] is None:
return ""

for n in sorted(occurrence_range):
if not isinstance(n, int) and not (isinstance(n, float) and not math.isnan(n)):
return occurrence_range
return ",".join([str(x) for x in occurrence_range])
elif i is None:
start = i = int(n)
elif n == i + 1:
Expand All @@ -100,4 +102,4 @@ def collapse_occurrence_range(occurrence_range: list):
else:
collapsed.append(f"{start}-{i}")

return collapsed
return ",".join(collapsed)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeDescription
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeDescription
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeDescription
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
column_id: ChargeFrequency
check:
value_in:
- "One-Time"
- "Recurring"
- "Usage-Based"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeFrequency
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeFrequency
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeFrequency
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
column_id: ChargeSubcategory
check:
value_in:
# Allowed when ChargeType=Usage
- "On-Demand"
- "Used Commitment"
- "Unused Commitment"
- "Usage"
# Allowed when ChargeType=Adjustment
- "Refund"
- "Credit"
- "Rounding Error"
- "General Adjustment"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeSubcategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeSubcategory
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeSubcategory
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
column_id: CommitmentDiscountCategory
check:
value_in:
- "Spend"
- "Usage"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountName
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountName
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: CommitmentDiscountName
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountType
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountType
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: EffectiveCost
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: EffectiveCost
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: EffectiveCost
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListCost
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListCost
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListCost
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListUnitPrice
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListUnitPrice
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListUnitPrice
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
column_id: PricingCategory
check:
value_in:
- "On-Demand"
- "Dynamic"
- "Commitment-Based"
- "Other"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingCategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingCategory
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: PricingCategory
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingQuantity
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingQuantity
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingQuantity
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingUnit
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingUnit
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: PricingUnit
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ResourceType
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ResourceType
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ResourceType
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SkuId
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuPriceId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
column_id: SkuPriceId
check_friendly_name: SkuPriceId must be set for certain values of ChargeType
check:
sql_query: |
SELECT CASE
WHEN ChargeType IN ('Purchase', 'Usage', 'Refund') AND SkuPriceId IS NULL THEN FALSE
ELSE TRUE
END AS check_output
FROM df;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SkuPriceId
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SubAccountId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SubAccountId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SubAccountId
check: column_required
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
column_id: SubAccountName
check:
column_required
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
data_type: stringified-json-object
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageUnit
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageUnit
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: UsageUnit
check: column_required
13 changes: 13 additions & 0 deletions focus_validator/rules/checks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from datetime import datetime
from typing import Union

Expand Down Expand Up @@ -108,3 +109,15 @@ def check_currency_code_dtype(pandas_obj: pd.Series):
return pd.Series(
map(lambda v: isinstance(v, str) and v in currency_codes, pandas_obj.values)
)


@extensions.register_check_method()
def check_stringified_json_object_dtype(pandas_obj: pd.Series):
def __validate_stringified_json_object__(value: str):
try:
parsed = json.loads(value)
return isinstance(parsed, dict)
except Exception:
return False

return pd.Series(map(__validate_stringified_json_object__, pandas_obj.values))
Loading

0 comments on commit 30b645d

Please sign in to comment.