diff --git a/dbt_project/macros/averagea.sql b/dbt_project/macros/averagea.sql new file mode 100644 index 0000000..0f14a45 --- /dev/null +++ b/dbt_project/macros/averagea.sql @@ -0,0 +1,48 @@ +{# +Returns the average (arithmetic mean) of the values in a column. +Handles text and non-numeric values. +When is this a good idea? Not sure. Lol. + +Example: + + Input: `example` + + | example | + |---------| + | 10 | + | 10s | + | 30 | + | True | + + select + {{ averagea('example') }} + from public.test + + Output: + + 10.25 + +Arguments: + column: Column name, required +#} + +{% macro averagea(column) %} + avg( + case + -- If boolean, cast as number. 1 for True and 0 for False. + when lower(cast({{ column }} as string)) in ('y', 'yes', 't', 'true') then 1.0 + when lower(cast({{ column }} as string)) in ('n', 'no', 'f', 'false') then 0.0 + + -- If integer, turn into float + when regexp_contains(lower(cast({{ column }} as string)), r'^([0-9]+)$') then cast({{ column }} as float64) + + -- If float, turn into float + when regexp_contains(lower(cast({{ column }} as string)), r'^[-+]?[0-9]*\.[0-9]+([eE][-+]?[0-9]+)?$') then cast({{ column }} as float64) + + -- If it contains a non-digit nor dot character, cast into 0.0 + when regexp_contains(lower(cast({{ column }} as string)), r'[^0-9.]') then 0.0 + end + ) +{% endmacro %} + + diff --git a/dbt_project/macros/averagex.sql b/dbt_project/macros/averagex.sql new file mode 100644 index 0000000..ab6e7f5 --- /dev/null +++ b/dbt_project/macros/averagex.sql @@ -0,0 +1,32 @@ +{# +Calculates the average (arithmetic mean) of a set of expressions evaluated over a table. + +Example: + + Input: 'Orders', 'Quantity * Sales' + + | Quantity | Sales | + |----------|--------| + | 2 | 10 | + | 1 | 20 | + | 3 | 5 | + + select {{ averagex('example') }} + + + Output: + + 18.33 + + +Arguments: + table: The table containing the rows for which the expression will be evaluated, required. + expression: The expression to be evaluated for each row of the table, required. +#} + +{% macro averagex(table, expression) %} + ( + select avg({{ expression }}) + from {{ table }} + ) +{% endmacro %} diff --git a/dbt_project/macros/remove_partial_duplicates.sql b/dbt_project/macros/remove_partial_duplicates.sql new file mode 100644 index 0000000..8192d4d --- /dev/null +++ b/dbt_project/macros/remove_partial_duplicates.sql @@ -0,0 +1,35 @@ +{# +Remove partial duplicates using a ranking function. +Needless to say, you will arbitrarly lose rows. +It filters out any row besides the first one per column. + +Example: + + Input: `size` + + | size | color | + |------+-------| + | S | red | + | S | blue | + | S | red | + | M | red | + + select * + from public.test + {{ remove_partial_duplicates('size') }} + + Output: + + | size | color | + |------+-------+ + | S | red | + | M | red | + +Arguments: + column: Column name, required +#} + +{% macro remove_partial_duplicates(column_name) %} +qualify row_number() over (partition by {{ column_name }}) = 1 +{% endmacro %} +