From 63630074d04045053ad8e25baf9b5c4f3ac820c7 Mon Sep 17 00:00:00 2001 From: georgewoodhead <50772749+georgewoodhead@users.noreply.github.com> Date: Mon, 13 Nov 2023 11:28:54 +0000 Subject: [PATCH] Add missing primary key to media_ad_views --- CHANGELOG | 7 ++++--- docs/markdown/snowplow_media_player_common_cols.md | 6 +++--- docs/markdown/snowplow_media_player_macro_docs.md | 4 ++-- models/base/scratch/base_scratch.yml | 2 +- models/media_ad_views/media_ad_views.yml | 7 +++++++ models/media_ad_views/scratch/base_scratch.yml | 7 +++++++ ...nowplow_media_player_media_ad_views_this_run.sql | 13 ++++++++----- .../snowplow_media_player_media_ad_views.sql | 1 + 8 files changed, 33 insertions(+), 14 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 1f28456..e55a186 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,10 +1,11 @@ snowplow-media-player 0.7.0 (2023-xx-xx) --------------------------------------- ## Summary -This release adds a more robust unique media identifier. This fixes an issue where duplicate `media_id` values could occur in the media stats table as a result of incorrect tracking implementation (e.g. sharing the same media label across different media types). +This release adds a more robust unique media identifier. This fixes an issue where duplicate `media_id` values could occur in the media stats table as a result of incorrect tracking implementation (e.g. sharing the same media label across different media types). This release also fixes the incremental materialization of the media_ad_views table by adding a unique primary key. -## Features -Add unique media identifier (close #59) +## Fixes +- Add unique media identifier (close #59) +- Add missing primary key to media_ad_views ## Under the hood diff --git a/docs/markdown/snowplow_media_player_common_cols.md b/docs/markdown/snowplow_media_player_common_cols.md index 22bf5c3..2c993c7 100644 --- a/docs/markdown/snowplow_media_player_common_cols.md +++ b/docs/markdown/snowplow_media_player_common_cols.md @@ -3,7 +3,7 @@ A UUID for each event e.g. `c6ef3124-b53a-4b13-a233-0088f79dcbcb`. {% enddocs %} {% docs col_media_identifier %} -The surrogate key generated from `media_id`, `media_label`, `media_type` and `media_player_type` to create a unique media element identifier. +The surrogate key generated from `player_id`, `media_label`, `media_type` and `media_player_type` to create a unique media element identifier. {% enddocs %} {% docs col_player_id %} @@ -11,7 +11,7 @@ The HTML id attribute of the media content. It is the `player_id` in case of You {% enddocs %} {% docs col_play_id %} -The surrogate key generated from `page_view_id`, `media_id`, `media_label`, `media_type` and `media_player_type` to create a unique play event identifier. +The surrogate key generated from `page_view_id`, `player_id`, `media_label`, `media_type` and `media_player_type` to create a unique play event identifier. {% enddocs %} {% docs col_page_view_id %} @@ -301,7 +301,7 @@ The number of pageviews with audio plays of any duration. {% enddocs %} {% docs col_last_base_tstamp %} -The start_tstamp of the last processed page_view across all media_ids to be used as a lower limit for subsequent incremental runs. +The start_tstamp of the last processed page_view across all media_identifiers to be used as a lower limit for subsequent incremental runs. {% enddocs %} {% docs col_player_current_time %} diff --git a/docs/markdown/snowplow_media_player_macro_docs.md b/docs/markdown/snowplow_media_player_macro_docs.md index ce48e71..feacf77 100644 --- a/docs/markdown/snowplow_media_player_macro_docs.md +++ b/docs/markdown/snowplow_media_player_macro_docs.md @@ -137,10 +137,10 @@ The query for the player_id column. ```sql select ..., - {{ media_id_field( + {{ player_id_field( youtube_player_id='a.contexts_com_youtube_youtube_1[0]:playerId', media_player_id='a.contexts_org_whatwg_media_element_1[0]:htmlId::varchar' - ) }} as media_id + ) }} as player_id from {{ var('snowplow__events') }} as a ``` diff --git a/models/base/scratch/base_scratch.yml b/models/base/scratch/base_scratch.yml index 020e259..ac8cbbc 100644 --- a/models/base/scratch/base_scratch.yml +++ b/models/base/scratch/base_scratch.yml @@ -44,7 +44,7 @@ models: description: '{{ doc("col_domain_userid") }}' - name: media_identifier description: '{{ doc("col_media_identifier") }}' - - name: media_id + - name: player_id description: '{{ doc("col_player_id") }}' - name: media_label description: '{{ doc("col_media_label") }}' diff --git a/models/media_ad_views/media_ad_views.yml b/models/media_ad_views/media_ad_views.yml index b9b3d41..fd3fe2c 100644 --- a/models/media_ad_views/media_ad_views.yml +++ b/models/media_ad_views/media_ad_views.yml @@ -5,6 +5,13 @@ models: +tags: "snowplow_media_player_incremental" description: '{{ doc("table_base") }}' columns: + - name: media_ad_view_id + description: The primary key of this table + tags: + - primary-key + tests: + - unique + - not_null - name: media_ad_id description: '{{ doc("col_media_ad_id") }}' tests: diff --git a/models/media_ad_views/scratch/base_scratch.yml b/models/media_ad_views/scratch/base_scratch.yml index 65e7130..dac1429 100644 --- a/models/media_ad_views/scratch/base_scratch.yml +++ b/models/media_ad_views/scratch/base_scratch.yml @@ -4,6 +4,13 @@ models: - name: snowplow_media_player_media_ad_views_this_run description: '{{ doc("table_media_ad_views_this_run") }}' columns: + - name: media_ad_view_id + description: The primary key of this table + tags: + - primary-key + tests: + - unique + - not_null - name: media_ad_id description: '{{ doc("col_media_ad_id") }}' tests: diff --git a/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql b/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql index 89dd133..90c4fc0 100644 --- a/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql +++ b/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql @@ -70,8 +70,11 @@ events_this_run as ( ) -select * - {% if target.type in ['databricks', 'spark'] -%} - , date(prep.viewed_at) as viewed_at_date - {%- endif %} - from prep +select + {{ dbt_utils.generate_surrogate_key(['p.play_id', 'p.ad_break_id', 'p.media_ad_id']) }} as media_ad_view_id + , p.* + {% if target.type in ['databricks', 'spark'] -%} + , date(p.viewed_at) as viewed_at_date + {%- endif %} + +from prep as p diff --git a/models/media_ad_views/snowplow_media_player_media_ad_views.sql b/models/media_ad_views/snowplow_media_player_media_ad_views.sql index 2555c44..71c8d78 100644 --- a/models/media_ad_views/snowplow_media_player_media_ad_views.sql +++ b/models/media_ad_views/snowplow_media_player_media_ad_views.sql @@ -8,6 +8,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ config( materialized= "incremental", + unique_key= 'media_ad_view_id', upsert_date_key='last_event', sort = 'last_event', dist = 'media_ad_id',