Skip to content

Commit

Permalink
First pass of validation rules for FOCUS 1.0
Browse files Browse the repository at this point in the history
Signed-off-by: Joshua Kwan <[email protected]>
  • Loading branch information
joshk0 committed Nov 14, 2023
1 parent a595826 commit 9d0bc11
Show file tree
Hide file tree
Showing 60 changed files with 333 additions and 6 deletions.
1 change: 1 addition & 0 deletions focus_validator/config_objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class DataTypes(Enum):
DECIMAL = "decimal"
DATETIME = "datetime"
CURRENCY_CODE = "currency-code"
STRINGIFIED_JSON_OBJECT = "stringified-json-object"


class DataTypeCheck(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ def __generate_column_definition__(
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
elif data_type == DataTypes.STRINGIFIED_JSON_OBJECT:
pandera_type = None
column_checks.append(
pa.Check.check_stringified_json_object_dtype(
ignore_na=True,
error=f"{rule.check_id}:::Ensures that column is of {data_type.value} type.",
)
)
else:
pandera_type = pa.String

Expand Down
2 changes: 1 addition & 1 deletion focus_validator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
help="Allow transitional rules in validation",
)
parser.add_argument(
"--validate-version", default="0.5", help="Version of FOCUS to validate against"
"--validate-version", default="1.0", help="Version of FOCUS to validate against"
)
parser.add_argument(
"--rule-set-path",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeDescription
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeDescription
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeDescription
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
column_id: ChargeFrequency
check:
value_in:
- "One-Time"
- "Recurring"
- "Usage-Based"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeFrequency
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeFrequency
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeFrequency
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
column_id: ChargeSubcategory
check:
value_in:
# Allowed when ChargeType=Usage
- "On-Demand"
- "Used Commitment"
- "Unused Commitment"
- "Usage"
# Allowed when ChargeType=Adjustment
- "Refund"
- "Credit"
- "Rounding Error"
- "General Adjustment"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeSubcategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ChargeSubcategory
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ChargeSubcategory
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
column_id: CommitmentDiscountCategory
check:
value_in:
- "Spend"
- "Usage"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
allow_nulls: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountCategory
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountId
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountName
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountName
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: CommitmentDiscountName
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountType
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: CommitmentDiscountType
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
column_id: PricingCategory
check:
value_in:
- "On-Demand"
- "Dynamic"
- "Commitment-Based"
- "Other"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingCategory
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingCategory
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: PricingCategory
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingUnit
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: PricingUnit
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: PricingUnit
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ResourceType
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: ResourceType
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: ResourceType
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SkuId
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuPriceId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SkuPriceId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SkuPriceId
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SubAccountId
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: SubAccountId
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: SubAccountId
check: column_required
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
column_id: SubAccountName
check:
column_required
check: column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
data_type: stringified-json-object
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: Tags
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
data_type: decimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageQuantity
check:
column_required
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageUnit
check:
data_type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
column_id: UsageUnit
check:
allow_nulls: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
column_id: UsageUnit
check: column_required
14 changes: 14 additions & 0 deletions focus_validator/rules/checks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime
from typing import Union

import json
import numpy as np
import pandas as pd
from pandera import extensions
Expand Down Expand Up @@ -63,3 +64,16 @@ def check_currency_code_dtype(pandas_obj: pd.Series):
return pd.Series(
map(lambda v: isinstance(v, str) and v in currency_codes, pandas_obj.values)
)

@extensions.register_check_method()
def check_stringified_json_object_dtype(pandas_obj: pd.Series):
def __validate_stringified_json_object__(value: str):
try:
parsed = json.loads(value)
return isinstance(parsed, dict)
except Exception as e:
return False

return pd.Series(
map(__validate_stringified_json_object__, pandas_obj.values)
)
66 changes: 66 additions & 0 deletions focus_validator/rules/version_sets.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,69 @@
'1.0':
- ChargeDescription_IsString.yaml
- ChargeDescription_Required.yaml
- ChargeDescription_NotNull.yaml

### Unmodified 0.5 content follows
- AmortizedCost_IsDecimal.yaml
- AmortizedCost_NotNull.yaml
- AmortizedCost_Required.yaml
- AvailabilityZone_IsString.yaml
- AvailabilityZone_Nullable.yaml
- BilledCost_IsDecimal.yaml
- BilledCost_NotNull.yaml
- BilledCost_Required.yaml
- BilledCurrency_IsCurrencyCode.yaml
- BilledCurrency_NotNull.yaml
- BilledCurrency_Required.yaml
- BillingAccountId_IsString.yaml
- BillingAccountId_NotNull.yaml
- BillingAccountId_Required.yaml
- BillingAccountName_IsString.yaml
- BillingAccountName_Nullable.yaml
- BillingAccountName_Required.yaml
- BillingPeriodEnd_IsDateTime.yaml
- BillingPeriodEnd_NotNull.yaml
- BillingPeriodEnd_Required.yaml
- BillingPeriodStart_IsDateTime.yaml
- BillingPeriodStart_NotNull.yaml
- BillingPeriodStart_Required.yaml
- ChargePeriodEnd_IsDateTime.yaml
- ChargePeriodEnd_NotNull.yaml
- ChargePeriodEnd_Required.yaml
- ChargePeriodStart_IsDateTime.yaml
- ChargePeriodStart_NotNull.yaml
- ChargePeriodStart_Required.yaml
- ChargeType_Enum.yaml
- ChargeType_IsString.yaml
- ChargeType_NotNull.yaml
- ChargeType_Required.yaml
- InvoiceIssuer_IsString.yaml
- InvoiceIssuer_NotNull.yaml
- Provider_IsString.yaml
- Provider_NotNull.yaml
- Provider_Required.yaml
- Publisher_IsString.yaml
- Publisher_NotNull.yaml
- Publisher_Required.yaml
- Region_IsString.yaml
- Region_NotNull.yaml
- Region_Required.yaml
- ResourceID_IsString.yaml
- ResourceID_Nullable.yaml
- ResourceName_IsString.yaml
- ResourceName_Nullable.yaml
- ResourceName_Required.yaml
- ServiceCategory_Enum.yaml
- ServiceCategory_IsString.yaml
- ServiceCategory_NotNull.yaml
- ServiceCategory_Required.yaml
- ServiceName_IsString.yaml
- ServiceName_NotNull.yaml
- ServiceName_Required.yaml
- SubAccountName_IsString.yaml
- SubAccountName_Nullable.yaml
- SubAccountName_Required.yaml

'0.5':
- AmortizedCost_IsDecimal.yaml
- AmortizedCost_NotNull.yaml
Expand Down
4 changes: 2 additions & 2 deletions focus_validator/utils/download_currency_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
CURRENCY_CODE_CSV_PATH = "focus_validator/utils/currency_codes.csv"


def download_currency_codes():
def download_currency_codes(): # pragma: no cover
r = requests.get(DATAHUB_URL)
root = ET.fromstring(r.content.decode())

Expand All @@ -25,5 +25,5 @@ def get_currency_codes():
return set(df["currency_codes"].values)


if __name__ == "__main__":
if __name__ == "__main__": # pragma: no cover
download_currency_codes()
Loading

0 comments on commit 9d0bc11

Please sign in to comment.