From a25ebd6c6d9da224dcb08f573ea5639dcdfff2ca Mon Sep 17 00:00:00 2001 From: georgewoodhead <50772749+georgewoodhead@users.noreply.github.com> Date: Mon, 13 Nov 2023 11:28:54 +0000 Subject: [PATCH] Add missing primary key to media_ad_views --- CHANGELOG | 7 ++++--- models/media_ad_views/media_ad_views.yml | 7 +++++++ models/media_ad_views/scratch/base_scratch.yml | 7 +++++++ ...nowplow_media_player_media_ad_views_this_run.sql | 13 ++++++++----- .../snowplow_media_player_media_ad_views.sql | 1 + 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 1f28456..e55a186 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,10 +1,11 @@ snowplow-media-player 0.7.0 (2023-xx-xx) --------------------------------------- ## Summary -This release adds a more robust unique media identifier. This fixes an issue where duplicate `media_id` values could occur in the media stats table as a result of incorrect tracking implementation (e.g. sharing the same media label across different media types). +This release adds a more robust unique media identifier. This fixes an issue where duplicate `media_id` values could occur in the media stats table as a result of incorrect tracking implementation (e.g. sharing the same media label across different media types). This release also fixes the incremental materialization of the media_ad_views table by adding a unique primary key. -## Features -Add unique media identifier (close #59) +## Fixes +- Add unique media identifier (close #59) +- Add missing primary key to media_ad_views ## Under the hood diff --git a/models/media_ad_views/media_ad_views.yml b/models/media_ad_views/media_ad_views.yml index b9b3d41..fd3fe2c 100644 --- a/models/media_ad_views/media_ad_views.yml +++ b/models/media_ad_views/media_ad_views.yml @@ -5,6 +5,13 @@ models: +tags: "snowplow_media_player_incremental" description: '{{ doc("table_base") }}' columns: + - name: media_ad_view_id + description: The primary key of this table + tags: + - primary-key + tests: + - unique + - not_null - name: media_ad_id description: '{{ doc("col_media_ad_id") }}' tests: diff --git a/models/media_ad_views/scratch/base_scratch.yml b/models/media_ad_views/scratch/base_scratch.yml index 65e7130..dac1429 100644 --- a/models/media_ad_views/scratch/base_scratch.yml +++ b/models/media_ad_views/scratch/base_scratch.yml @@ -4,6 +4,13 @@ models: - name: snowplow_media_player_media_ad_views_this_run description: '{{ doc("table_media_ad_views_this_run") }}' columns: + - name: media_ad_view_id + description: The primary key of this table + tags: + - primary-key + tests: + - unique + - not_null - name: media_ad_id description: '{{ doc("col_media_ad_id") }}' tests: diff --git a/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql b/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql index 89dd133..90c4fc0 100644 --- a/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql +++ b/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql @@ -70,8 +70,11 @@ events_this_run as ( ) -select * - {% if target.type in ['databricks', 'spark'] -%} - , date(prep.viewed_at) as viewed_at_date - {%- endif %} - from prep +select + {{ dbt_utils.generate_surrogate_key(['p.play_id', 'p.ad_break_id', 'p.media_ad_id']) }} as media_ad_view_id + , p.* + {% if target.type in ['databricks', 'spark'] -%} + , date(p.viewed_at) as viewed_at_date + {%- endif %} + +from prep as p diff --git a/models/media_ad_views/snowplow_media_player_media_ad_views.sql b/models/media_ad_views/snowplow_media_player_media_ad_views.sql index 2555c44..71c8d78 100644 --- a/models/media_ad_views/snowplow_media_player_media_ad_views.sql +++ b/models/media_ad_views/snowplow_media_player_media_ad_views.sql @@ -8,6 +8,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ config( materialized= "incremental", + unique_key= 'media_ad_view_id', upsert_date_key='last_event', sort = 'last_event', dist = 'media_ad_id',