Skip to content

Commit

Permalink
First pass of validation rules for FOCUS 1.0
Browse files Browse the repository at this point in the history
Signed-off-by: Joshua Kwan <[email protected]>
  • Loading branch information
joshk0 committed Nov 20, 2023
1 parent 4b2c595 commit 79f1a97
Show file tree
Hide file tree
Showing 74 changed files with 451 additions and 11 deletions.
1 change: 1 addition & 0 deletions focus_validator/config_objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class DataTypes(Enum):
DECIMAL = "decimal"
DATETIME = "datetime"
CURRENCY_CODE = "currency-code"
STRINGIFIED_JSON_OBJECT = "stringified-json-object"


class DataTypeCheck(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ def __generate_column_definition__(
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
elif data_type == DataTypes.STRINGIFIED_JSON_OBJECT:
pandera_type = None
column_checks.append(
pa.Check.check_stringified_json_object_dtype(
ignore_na=True,
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
else:
pandera_type = pa.String

Expand Down
2 changes: 1 addition & 1 deletion focus_validator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
help="Allow transitional rules in validation",
)
parser.add_argument(
"--validate-version", default="0.5", help="Version of FOCUS to validate against"
"--validate-version", default="1.0", help="Version of FOCUS to validate against"
)
parser.add_argument(
"--rule-set-path",
Expand Down
17 changes: 13 additions & 4 deletions focus_validator/outputter/outputter_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def write(self, result_set: ValidationResult):

checklist = self.__restructure_check_list__(result_set)
print("Checklist:")
print(tabulate(checklist, headers="keys", tablefmt="psql"))
print(tabulate(checklist, headers="keys", showindex=False))

if result_set.failure_cases is not None:
aggregated_failures = result_set.failure_cases.groupby(
Expand All @@ -71,7 +71,8 @@ def write(self, result_set: ValidationResult):
tabulate(
tabular_data=aggregated_failures, # type: ignore
headers="keys",
tablefmt="psql",
showindex=False,
maxcolwidths=60
)
)

Expand All @@ -80,9 +81,17 @@ def collapse_occurrence_range(occurrence_range: list):
start = None
i = None
collapsed = []

# Edge case
if len(occurrence_range) == 1:
if isinstance(occurrence_range[0], float) and math.isnan(occurrence_range[0]):
return ''
if occurrence_range[0] is None:
return ''

for n in sorted(occurrence_range):
if not isinstance(n, int) and not (isinstance(n, float) and not math.isnan(n)):
return occurrence_range
return ','.join([str(x) for x in occurrence_range])
elif i is None:
start = i = int(n)
elif n == i + 1:
Expand All @@ -100,4 +109,4 @@ def collapse_occurrence_range(occurrence_range: list):
else:
collapsed.append(f"{start}-{i}")

return collapsed
return ','.join(collapsed)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeDescription
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeDescription
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeDescription
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
column_id: ChargeFrequency
check:
value_in:
- "One-Time"
- "Recurring"
- "Usage-Based"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeFrequency
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeFrequency
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeFrequency
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
column_id: ChargeSubcategory
check:
value_in:
# Allowed when ChargeType=Usage
- "On-Demand"
- "Used Commitment"
- "Unused Commitment"
- "Usage"
# Allowed when ChargeType=Adjustment
- "Refund"
- "Credit"
- "Rounding Error"
- "General Adjustment"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeSubcategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeSubcategory
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeSubcategory
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
column_id: CommitmentDiscountCategory
check:
value_in:
- "Spend"
- "Usage"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountName
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountName
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: CommitmentDiscountName
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountType
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountType
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: EffectiveCost
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: EffectiveCost
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: EffectiveCost
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListCost
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListCost
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListCost
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListUnitPrice
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListUnitPrice
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ListUnitPrice
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
column_id: PricingCategory
check:
value_in:
- "On-Demand"
- "Dynamic"
- "Commitment-Based"
- "Other"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingCategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingCategory
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: PricingCategory
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingQuantity
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingQuantity
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingQuantity
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingUnit
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingUnit
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: PricingUnit
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ResourceType
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ResourceType
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ResourceType
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SkuId
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuPriceId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
column_id: SkuPriceId
check_friendly_name: SkuPriceId must be set for certain values of ChargeType
check:
sql_query: |
SELECT CASE
WHEN ChargeType IN ('Purchase', 'Usage', 'Refund') AND SkuPriceId IS NULL THEN FALSE
ELSE TRUE
END AS check_output
FROM df;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SkuPriceId
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SubAccountId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SubAccountId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SubAccountId
check: column_required
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
column_id: SubAccountName
check:
column_required
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
data_type: stringified-json-object
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageUnit
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageUnit
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: UsageUnit
check: column_required
13 changes: 13 additions & 0 deletions focus_validator/rules/checks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from datetime import datetime
from typing import Union

Expand Down Expand Up @@ -63,3 +64,15 @@ def check_currency_code_dtype(pandas_obj: pd.Series):
return pd.Series(
map(lambda v: isinstance(v, str) and v in currency_codes, pandas_obj.values)
)


@extensions.register_check_method()
def check_stringified_json_object_dtype(pandas_obj: pd.Series):
def __validate_stringified_json_object__(value: str):
try:
parsed = json.loads(value)
return isinstance(parsed, dict)
except Exception:
return False

return pd.Series(map(__validate_stringified_json_object__, pandas_obj.values))
Loading

0 comments on commit 79f1a97

Please sign in to comment.