Skip to content

Commit

Permalink
revert sql changes, moved to #581
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronsteers committed Feb 27, 2023
1 parent 70e72fd commit bc58684
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 198 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
{{
config(
materialized='table'
)
}}

WITH reparse_1 AS (
-- Incident: new schema not registered to SnowcatCloud versions 2.14.0 and
-- 2.15.0 (partial)
Expand Down
188 changes: 182 additions & 6 deletions data/transform/models/staging/snowplow/stg_snowplow__events.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,39 @@
{{
config(
materialized='table'
materialized='incremental'
)
}}

WITH blended_source AS (

SELECT *
FROM {{ ref('stg_snowplow__events_union_all') }}
SELECT
*,
FALSE AS snowplow_bad_parsed
{% if env_var("MELTANO_ENVIRONMENT") == "cicd" %}

FROM raw.snowplow.events
WHERE derived_tstamp::TIMESTAMP >= DATEADD('day', -3, CURRENT_DATE)
{% else %}

FROM {{ source('snowplow', 'events') }}

{% if is_incremental() %}

WHERE UPLOADED_AT >= (SELECT max(UPLOADED_AT) FROM {{ this }} WHERE snowplow_bad_parsed = FALSE)

{% endif %}
{% endif %}

UNION ALL

SELECT
*,
TRUE AS snowplow_bad_parsed
FROM {{ ref('snowplow_bad_parsed') }}
{% if is_incremental() %}

WHERE event_id NOT IN (SELECT DISTINCT event_id FROM {{ this }} WHERE snowplow_bad_parsed = TRUE)

{% endif %}

),

Expand All @@ -18,7 +44,7 @@ source AS (
ROW_NUMBER() OVER (
PARTITION BY
event_id
ORDER BY event_created_at::TIMESTAMP DESC
ORDER BY derived_tstamp::TIMESTAMP DESC
) AS row_num
FROM blended_source

Expand All @@ -29,8 +55,158 @@ clean_new_source AS (
SELECT *
FROM source
WHERE row_num = 1
{% if is_incremental() %}

AND event_id NOT IN (
SELECT DISTINCT event_id FROM {{ this }}
)
{% endif %}

),

renamed AS (

SELECT -- noqa: L034
-- only meltano events. For the first ~6 months no app_id was
-- sent from Meltano. So nulls are from meltano.
COALESCE(app_id, 'meltano') AS app_id,
platform,
etl_tstamp::TIMESTAMP AS etl_enriched_at,
collector_tstamp::TIMESTAMP AS collector_received_at,
dvce_created_tstamp::TIMESTAMP AS device_created_at,
dvce_sent_tstamp::TIMESTAMP AS device_sent_at,
refr_dvce_tstamp::TIMESTAMP AS refr_device_processed_at,
derived_tstamp::TIMESTAMP AS event_created_at,
derived_tstamp::DATE AS event_created_date,
true_tstamp::TIMESTAMP AS true_sent_at,
uploaded_at,
event,
event_id,
txn_id,
name_tracker,
v_tracker,
v_collector,
v_etl,
user_id,
user_ipaddress,
user_fingerprint,
domain_userid,
domain_sessionidx,
network_userid,
geo_country,
geo_region,
geo_city,
geo_zipcode,
geo_latitude,
geo_longitude,
geo_region_name,
ip_isp,
ip_organization,
ip_domain,
ip_netspeed,
page_url,
page_title,
page_referrer,
page_urlscheme,
page_urlhost,
page_urlport,
page_urlpath,
page_urlquery,
page_urlfragment,
refr_urlscheme,
refr_urlhost,
refr_urlport,
refr_urlpath,
refr_urlquery,
refr_urlfragment,
refr_medium,
refr_source,
refr_term,
mkt_medium,
mkt_source,
mkt_term,
mkt_content,
mkt_campaign,
contexts,
se_category,
se_action,
se_label,
se_property,
se_value,
unstruct_event,
tr_orderid,
tr_affiliation,
tr_total,
tr_tax,
tr_shipping,
tr_city,
tr_state,
tr_country,
ti_orderid,
ti_sku,
ti_name,
ti_category,
ti_price,
ti_quantity,
pp_xoffset_min,
pp_xoffset_max,
pp_yoffset_min,
pp_yoffset_max,
useragent,
br_name,
br_family,
br_version,
br_type,
br_renderengine,
br_lang,
br_features_pdf,
br_features_flash,
br_features_java,
br_features_director,
br_features_quicktime,
br_features_realplayer,
br_features_windowsmedia,
br_features_gears,
br_features_silverlight,
br_cookies,
br_colordepth,
br_viewwidth,
br_viewheight,
os_name,
os_family,
os_manufacturer,
os_timezone,
dvce_type,
dvce_ismobile,
dvce_screenwidth,
dvce_screenheight,
doc_charset,
doc_width,
doc_height,
tr_currency,
tr_total_base,
tr_tax_base,
tr_shipping_base,
ti_currency,
ti_price_base,
base_currency,
geo_timezone,
mkt_clickid,
mkt_network,
etl_tags,
refr_domain_userid,
derived_contexts::VARIANT AS derived_contexts,
domain_sessionid,
event_vendor,
event_name,
event_format,
event_version,
event_fingerprint,
MD5(user_ipaddress) AS ip_address_hash,
snowplow_bad_parsed
FROM clean_new_source

)

SELECT *
FROM clean_new_source
FROM renamed
Loading

0 comments on commit bc58684

Please sign in to comment.