From b5f665c8e5492de07b24825134e83e836b4b3f60 Mon Sep 17 00:00:00 2001 From: Trevor Hutto Date: Tue, 21 Nov 2023 08:27:40 -0500 Subject: [PATCH] prefix all vars with fullstory, add descriptions to README, general hygiene (#10) A couple things here: 1. Prefixing all of our vars with the standard `fullstory_` will make sure that our vars will not collide with any other DBT package. 2. When using this DBT package, I found it necessary to be able to provide the database, schema and table name explicitly so that we don't need to run the consuming DBT project in the same dataset as the raw events table. To support this, I introduced: `fullstory_events_database`, `fullstory_events_schema` and `fullstory_events_table`. This allows us to sync the events table once using Data Destinations, then use the same table across many projects or datasets (Big Query). 3. Updated variable names to be more consistent / better indicate their purpose. @craigrmccown this also addresses feedback [here](https://github.com/dbt-labs/hubcap/pull/281#pullrequestreview-1619059947). --- README.md | 68 ++++++++++++++++++++--------- dbt_project.yml | 2 +- models/_session_models.yml | 2 +- models/_user_models.yml | 2 +- models/staging/_events__sources.yml | 6 +-- 5 files changed, 53 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index d21c129..4fb0d05 100644 --- a/README.md +++ b/README.md @@ -7,25 +7,47 @@ This dbt package contains models, macros, seeds, and tests for [FullStory](https | sessions | Session-level aggregations, including event counts broken down by type, location and device information, duration, FullStory session replay links, etc. | users | User-level aggregations, including email addresses, location and device information, session counts, etc. +## Vars +| var | description | +| - | - | +| fullstory_events_database | The database where your FullStory events table lives. | +| fullstory_events_schema | The schema inside of your database where your FullStory events table lives. | +| fullstory_events_table | The name of the table inside your schema where your FullStory events table lives. | +| fullstory_replay_host | The hostname to use when building links to session replay. | +| fullstory_sessions_model_name | The name of the model for the canonical list of sessions. | +| fullstory_users_model_name | The name of the model for the canonical list of users. | +| fullstory_min_event_time | All events before this date will not be considered for analysis. Use this option to limit table size. | +| fullstory_event_types | A list of event types to auto-generate rollups for in the `users` and `sessions` model. | + +> We **highly recommend** using `fullstory_events_database`, `fullstory_events_schema` and `fullstory_events_table` to indicate the location of the FullStory events table that is synced from Data Destinations. Using these variables allow you to use a separate database or schema for the FullStory events table than your dbt package. + +#### Example use of vars for Big Query +```yaml +vars: + fullstory_events_database: my-gcp-project + fullstory_events_schema: my-big-query-dataset + fullstory_events_table: fullstory_events_[my-org-id] +``` + +#### Example use of vars for Snowflake +```yaml +vars: + fullstory_events_database: my_database + fullstory_events_schema: my_schema + fullstory_events_table: my_table +``` + ## Supported Warehouses - BigQuery - Snowflake -## Quick Start -To deploy or update this package in your warehouse, follow these steps: -- Clone this project: `git clone https://github.com/fullstorydev/dbt_fullstory.git && cd ./dbt_fullstory` -- Create a new profile called `dbt_fullstory` in `~/.dbt/profiles.yml`. You can find instructions for this step for [BigQuery](https://docs.getdbt.com/docs/core/connect-data-platform/bigquery-setup) and [Snowflake](https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup) in the official dbt documentaion. -- Install dependencies, seed, and run: `dbt deps && dbt seed && dbt run` - ### Example Profile Configurations #### BigQuery ```yaml dbt_fullstory: - target: o-abcd-na1 # Your FullStory org ID + target: prod outputs: - staging: - # The project and dataset should match Data Destinations settings - # configured within FullStory + prod: type: bigquery method: oauth project: my-gcp-project @@ -36,14 +58,9 @@ dbt_fullstory: #### SnowFlake ```yaml dbt_fullstory: - target: o-abcd-na1 # Your FullStory org ID + target: prod outputs: - staging: - # The account, database, and warehouse should match Data Destinations - # settings configured within FullStory. The schema is automatically - # created in your warehouse the first time FullStory syncs data. The - # user, password, and role should have permission to create objects - # within the specified schema. + prod: type: snowflake account: xy12345.us-east-1.aws user: my_admin_user @@ -51,7 +68,7 @@ dbt_fullstory: role: my_admin_role database: fullstory warehouse: compute_wh - schema: fullstory_o_abcd_na1 + schema: my_schema threads: 1 client_session_keep_alive: False query_tag: [fullstory_dbt] @@ -63,7 +80,11 @@ General information about dbt packages can be found [here](https://docs.getdbt.c ### Requirements - dbt version >= 1.6.0 - FullStory Data Destination events table - - In BigQuery, this table will be named `fullstory_events_o_123_na1`. + - In BigQuery, this table will be named `fullstory_events_o_123_na1` where `o-123-na1` is your org id. + - Your org ID can be found in the URL when logged into fullstory. + ``` + app.fullstory.com/ui//... + ``` - In Snowflake, this table will be named `events`. - The events table will be created the first time that FullStory syncs event data to your warehouse. @@ -71,8 +92,13 @@ General information about dbt packages can be found [here](https://docs.getdbt.c Include the following into your packages.yml file: ```yaml - - git: fullstorydev/dbt_fullstory - revision: 0.1.0 + - package: fullstorydev/dbt_fullstory + revision: 0.2.0 ``` Then, run `dbt deps` to install the package. We highly recommend pinning to a specific release. Pinning your version helps prevent unintended changes to your warehouse. + +To use the seed tables which have some info around common types, run: +```sh +dbt seed +``` diff --git a/dbt_project.yml b/dbt_project.yml index a581e4e..1eb121b 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,7 +1,7 @@ name: "dbt_fullstory" version: "0.1.0" config-version: 2 -require-dbt-version: [">=1.2.0", "<2.0.0"] +require-dbt-version: [">=1.6.0", "<2.0.0"] # This setting configures which "profile" dbt uses for this project. profile: "dbt_fullstory" diff --git a/models/_session_models.yml b/models/_session_models.yml index 1fa77ed..fcff8b6 100644 --- a/models/_session_models.yml +++ b/models/_session_models.yml @@ -423,7 +423,7 @@ models: - name: sessions access: public config: - alias: "{% if var('sessions_identifier', '') %}{{ var('sessions_identifier') }}{% else %}{% if target.type == 'bigquery' %}fullstory_sessions_{{ modules.re.sub('[^a-zA-Z\\d_\\-]', '_', target.name) }}{% else %}sessions{% endif %}{% endif %}" + alias: "{% if var('fullstory_sessions_model_name', '') %}{{ var('fullstory_sessions_model_name') }}{% else %}{% if target.type == 'bigquery' %}fullstory_sessions_{{ modules.re.sub('[^a-zA-Z\\d_\\-]', '_', target.name) }}{% else %}sessions{% endif %}{% endif %}" contract: enforce: true description: |- diff --git a/models/_user_models.yml b/models/_user_models.yml index e28dfd1..3549b35 100644 --- a/models/_user_models.yml +++ b/models/_user_models.yml @@ -376,7 +376,7 @@ models: - name: users access: public config: - alias: "{% if var('users_identifier', '') %}{{ var('users_identifier') }}{% else %}{% if target.type == 'bigquery' %}fullstory_users_{{ modules.re.sub('[^a-zA-Z\\d_\\-]', '_', target.name) }}{% else %}users{% endif %}{% endif %}" + alias: "{% if var('fullstory_users_model_name', '') %}{{ var('fullstory_users_model_name') }}{% else %}{% if target.type == 'bigquery' %}fullstory_users_{{ modules.re.sub('[^a-zA-Z\\d_\\-]', '_', target.name) }}{% else %}users{% endif %}{% endif %}" contract: enforce: true description: |- diff --git a/models/staging/_events__sources.yml b/models/staging/_events__sources.yml index fc50b3f..3c9318d 100644 --- a/models/staging/_events__sources.yml +++ b/models/staging/_events__sources.yml @@ -2,12 +2,12 @@ version: 2 sources: - name: fullstory - database: "{{ target.database }}" - schema: "{{ target.schema or target.dataset}}" + database: "{{ var('fullstory_events_database', target.database) }}" + schema: "{{ var('fullstory_events_schema', target.schema or target.dataset) }}" tables: - name: events description: The FullStory events table. - identifier: "{% if var('events_table', '') %}{{ var('events_table') }}{% else %}{% if target.type == 'bigquery' %}fullstory_events_{{ modules.re.sub('[^a-zA-Z\\d_\\-]', '_', target.name) }}{% else %}events{% endif %}{% endif %}" + identifier: "{% if var('fullstory_events_table', '') %}{{ var('fullstory_events_table') }}{% else %}{% if target.type == 'bigquery' %}fullstory_events_{{ modules.re.sub('[^a-zA-Z\\d_\\-]', '_', target.name) }}{% else %}events{% endif %}{% endif %}" columns: - name: event_id - name: device_id