diff --git a/CHANGELOG b/CHANGELOG index 3b49c01..2f66e2a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,10 +3,13 @@ snowplow-media-player 0.7.0 (2023-xx-xx) ## Summary This version adds new features powered by a complete refactor of the core processing of the package by moving it out to the new `base` macro functionality provided in `snowplow_utils`. This enables users to now specify custom fields for sessionization and user identification, to add custom entities/SDEs fields to the base events table for redshift/postgres, and to add passthrough fields to the derived tables so you can now more easily add your own fields to our tables. +The default session identifier has been updated from using the domain_sessionid, to now be the media session id (or the page/screen view id if the media session entity is not set). Previously media events from a play that overlapped to a new domain_sessionid were discarded, this update ensures the complete media play is modeled. It is still possible to perform the original session level analysis using the new `domain_sessionid_array` field. + In addition this release adds a more robust unique media identifier. This fixes an issue where duplicate `media_id` values could occur in the media stats table as a result of incorrect tracking implementation (e.g. sharing the same media label across different media types). This release also fixes the incremental materialization of the media_ad_views table by adding a unique primary key. ## Features - Migrate base models to the new `base` macros for flexibility and consistency +- Updated the default session identifier be the media session id (or page/screen view id if the media session entity is not set) - Add ability to pass fields through to derived media base and ad views tables - Add new field `domain_sessionid_array` to derived tables (where applicable) diff --git a/docs/markdown/snowplow_media_player_common_cols.md b/docs/markdown/snowplow_media_player_common_cols.md index ccd64b2..97d4e41 100644 --- a/docs/markdown/snowplow_media_player_common_cols.md +++ b/docs/markdown/snowplow_media_player_common_cols.md @@ -22,6 +22,10 @@ A UUID for each page view e.g. `c6ef3124-b53a-4b13-a233-0088f79dcbcb`. The session identifier as defined in your project variables. Default to the media_session_id, or to page_view_id if the media session entity is not enabled. {% enddocs %} +{% docs col_original_session_identifier %} +The session identifier set by Snowplow using 1st party cookie. This is the domain_sessionid or session_id from the mobile session context. +{% enddocs %} + {% docs col_domain_sessionid_array %} All domain_sessionids seen for a play_id. {% enddocs %} diff --git a/macros/identifiers.sql b/macros/identifiers.sql index 9c4298e..0e8c09c 100644 --- a/macros/identifiers.sql +++ b/macros/identifiers.sql @@ -21,29 +21,24 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(var('snowplow__session_identifiers')) }} {% else %} + {% set identifiers = [] %} {% if var('snowplow__enable_media_session') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_snowplow_media_session_1', 'field': 'media_session_id', 'prefix': 'media_session_'}, - {'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}]) }} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_media_session_1', 'field': 'media_session_id', 'prefix': 'media_session_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'}] )}} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}) %} + {% endif %} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'}) %} {% endif %} + {% endif %} + {{ return(identifiers) }} + {% endmacro %} @@ -53,91 +48,77 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(var('snowplow__session_identifiers')) }} {% else %} + {% set identifiers = [] %} {% if var('snowplow__enable_media_session') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_snowplow_media_session_1', 'field': 'mediaSessionId', 'prefix': 'media_session_'}, - {'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}]) }} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_media_session_1', 'field': 'mediaSessionId', 'prefix': 'media_session_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'}] )}} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}) %} + {% endif %} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'}) %} {% endif %} + {% endif %} + {{ return(identifiers) }} + {% endmacro %} + {% macro bigquery__session_identifiers() %} {% if var('snowplow__session_identifiers') %} {{ return(var('snowplow__session_identifiers')) }} {% else %} + {% set identifiers = [] %} {% if var('snowplow__enable_media_session') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_snowplow_media_session_1_*', 'field': 'media_session_id', 'prefix': 'media_session_'}, - {'schema': 'contexts_com_snowplowanalytics_mobile_screen_1_*', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1_*', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_mobile_screen_1_*', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1_*', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_mobile_screen_1_*', 'field': 'id', 'prefix': 'mobile_screen_'}]) }} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_media_session_1_*', 'field': 'media_session_id', 'prefix': 'media_session_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1_*', 'field': 'id', 'prefix': 'web_page_'}] )}} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_mobile_screen_1_*', 'field': 'id', 'prefix': 'mobile_screen_'}) %} + {% endif %} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_web_page_1_*', 'field': 'id', 'prefix': 'web_page_'}) %} {% endif %} {% endif %} + {{ return(identifiers) }} + {% endmacro %} + {% macro postgres__session_identifiers() %} {% if var('snowplow__session_identifiers') %} {{ return(var('snowplow__session_identifiers')) }} {% else %} + {% set identifiers = [] %} {% if var('snowplow__enable_media_session') %} - {{ return([ - {'schema': 'com_snowplowanalytics_snowplow_media_session_1', 'field': 'media_session_id', 'prefix': 'media_session_'}, - {'schema': 'com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}, - {'schema': 'com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'} - ]) }} - - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}]) }} + {% do identifiers.append({'schema': 'com_snowplowanalytics_snowplow_media_session_1', 'field': 'media_session_id', 'prefix': 'media_session_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'}] )}} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'com_snowplowanalytics_mobile_screen_1', 'field': 'id', 'prefix': 'mobile_screen_'}) %} + {% endif %} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append({'schema': 'com_snowplowanalytics_snowplow_web_page_1', 'field': 'id', 'prefix': 'web_page_'}) %} {% endif %} + {% endif %} + {{ return(identifiers) }} + {% endmacro %} @@ -157,20 +138,18 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(var('snowplow__user_identifiers')) }} {% else %} + {% set identifiers = [] %} - {% if var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'field': 'user_id', 'prefix': 'user_'}, - {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'} - ]) }} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append( {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}) %} + {% endif %} - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'field': 'user_id', 'prefix': 'user_'}]) }} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'field': 'user_id', 'prefix': 'user_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}] )}} + {{ return(identifiers) }} - {% endif %} {% endif %} {% endmacro %} @@ -182,20 +161,18 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(var('snowplow__user_identifiers')) }} {% else %} + {% set identifiers = [] %} - {% if var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'field': 'userId', 'prefix': 'user_'}, - {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'} - ]) }} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append( {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}) %} + {% endif %} - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'field': 'userId', 'prefix': 'user_'}]) }} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'field': 'userId', 'prefix': 'user_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}] )}} + {{ return(identifiers) }} - {% endif %} {% endif %} {% endmacro %} @@ -206,20 +183,18 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(var('snowplow__user_identifiers')) }} {% else %} + {% set identifiers = [] %} - {% if var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1_*', 'field': 'user_id', 'prefix': 'user_'}, - {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'} - ]) }} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append( {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}) %} + {% endif %} - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1_*', 'field': 'user_id', 'prefix': 'user_'}]) }} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'contexts_com_snowplowanalytics_snowplow_client_session_1_*', 'field': 'user_id', 'prefix': 'user_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}] )}} + {{ return(identifiers) }} - {% endif %} {% endif %} {% endmacro %} @@ -230,20 +205,18 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {{ return(var('snowplow__user_identifiers')) }} {% else %} + {% set identifiers = [] %} - {% if var('snowplow__enable_web_events') and var('snowplow__enable_mobile_events') %} - {{ return([ - {'schema': 'com_snowplowanalytics_snowplow_client_session_1', 'field': 'user_id', 'prefix': 'user_'}, - {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'} - ]) }} + {% if var('snowplow__enable_web_events') %} + {% do identifiers.append( {'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}) %} + {% endif %} - {% elif var('snowplow__enable_mobile_events') %} - {{ return([{'schema': 'com_snowplowanalytics_snowplow_client_session_1', 'field': 'user_id', 'prefix': 'user_'}]) }} + {% if var('snowplow__enable_mobile_events') %} + {% do identifiers.append({'schema': 'com_snowplowanalytics_snowplow_client_session_1', 'field': 'user_id', 'prefix': 'user_'}) %} + {% endif %} - {% else %} - {{ return([{'schema': 'atomic', 'field': 'domain_userid', 'prefix': 'user_'}] )}} + {{ return(identifiers) }} - {% endif %} {% endif %} {% endmacro %} diff --git a/models/base/scratch/base_scratch.yml b/models/base/scratch/base_scratch.yml index 6a703cc..3819e09 100644 --- a/models/base/scratch/base_scratch.yml +++ b/models/base/scratch/base_scratch.yml @@ -38,6 +38,8 @@ models: - not_null - name: page_view_id description: '{{ doc("col_page_view_id") }}' + - name: original_session_identifier + description: '{{ doc("col_original_session_identifier") }}' - name: session_identifier description: '{{ doc("col_session_identifier") }}' - name: user_identifier diff --git a/models/media_ad_views/media_ad_views.yml b/models/media_ad_views/media_ad_views.yml index 6d141f0..c86f5ba 100644 --- a/models/media_ad_views/media_ad_views.yml +++ b/models/media_ad_views/media_ad_views.yml @@ -66,3 +66,5 @@ models: - not_null - name: last_event description: '{{ doc("col_last_event") }}' + - name: domain_sessionid_array + description: '{{ doc("col_domain_sessionid_array") }}' diff --git a/models/media_base/media_base.yml b/models/media_base/media_base.yml index 605dfdd..ced5a15 100644 --- a/models/media_base/media_base.yml +++ b/models/media_base/media_base.yml @@ -22,8 +22,12 @@ models: description: '{{ doc("col_media_label") }}' - name: session_identifier description: '{{ doc("col_session_identifier") }}' + - name: domain_sessionid_array + description: '{{ doc("col_domain_sessionid_array") }}' - name: user_identifier description: '{{ doc("col_user_identifier") }}' + - name: user_id + description: '{{ doc("col_user_id") }}' - name: page_referrer description: '{{ doc("col_page_referrer") }}' - name: page_url diff --git a/models/media_plays/media_plays.yml b/models/media_plays/media_plays.yml index 2e5e4c7..0d63cf9 100644 --- a/models/media_plays/media_plays.yml +++ b/models/media_plays/media_plays.yml @@ -22,8 +22,12 @@ models: description: '{{ doc("col_media_label") }}' - name: session_identifier description: '{{ doc("col_session_identifier") }}' + - name: domain_sessionid_array + description: '{{ doc("col_domain_sessionid_array") }}' - name: user_identifier description: '{{ doc("col_user_identifier") }}' + - name: user_id + description: '{{ doc("col_user_id") }}' - name: page_referrer description: '{{ doc("col_page_referrer") }}' - name: page_url