-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
keegansmith21
committed
Dec 13, 2023
1 parent
5e6e417
commit c4b65be
Showing
21 changed files
with
1,033 additions
and
1,055 deletions.
There are no files selected for viewing
172 changes: 172 additions & 0 deletions
172
oaebu_workflows/google_analytics3_telescope/sql/bp_body_google_analytics3.sql.jinja2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
-- The purpose of this block of SQL is to organise the metrics from google analytics for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_analytics3_table_id'. | ||
-- This will either point to 'empty_google_analytics3' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
google_analytics3_grouped_metrics AS( | ||
SELECT | ||
publication_id, | ||
release_date, | ||
publication_whole_or_part, | ||
publication_format, | ||
IF ( | ||
publication_format = 'PDF' | ||
AND publication_whole_or_part = 'whole', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS pdf_book_country, | ||
IF ( | ||
publication_format = 'PDF' | ||
AND publication_whole_or_part = 'part', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS pdf_chapter_country, | ||
IF ( | ||
publication_format = 'HTML' | ||
AND publication_whole_or_part = 'whole', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS html_book_country, | ||
IF ( | ||
publication_format = 'HTML' | ||
AND publication_whole_or_part = 'part', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS html_chapter_country, | ||
IF ( | ||
publication_format = 'EPUB' | ||
AND publication_whole_or_part = 'whole', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS epub_book_country, | ||
IF ( | ||
publication_format = 'EPUB' | ||
AND publication_whole_or_part = 'part', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS epub_chapter_country, | ||
IF ( | ||
publication_format = 'MOBI' | ||
AND publication_whole_or_part = 'whole', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS mobi_book_country, | ||
IF ( | ||
publication_format = 'MOBI' | ||
AND publication_whole_or_part = 'part', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS mobi_chapter_country, | ||
IF ( | ||
publication_format IN ('PDF', 'HTML', 'EPUB', 'MOBI') | ||
AND publication_whole_or_part IN ('whole', 'part'), | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS downloads_total_country, | ||
IF ( | ||
publication_whole_or_part = '(citation)', | ||
group_items_google_analytics3( | ||
ARRAY_CONCAT_AGG({ { ga3_views_field } }.country) | ||
), | ||
ARRAY_AGG( | ||
STRUCT( | ||
CAST(NULL as STRING) as name, | ||
CAST(null as INT64) as value | ||
) | ||
) | ||
) AS views_total_country, | ||
FROM | ||
`{{ google_analytics3_table_id }}` | ||
WHERE | ||
publication_type = "book" | ||
GROUP BY | ||
publication_id, | ||
release_date, | ||
publication_whole_or_part, | ||
publication_format | ||
), | ||
google_analytics3_metrics AS ( | ||
SELECT | ||
publication_id AS ISBN13, | ||
release_date, | ||
STRUCT( | ||
ARRAY_CONCAT_AGG(views_total_country) AS views_total_country, | ||
group_items_google_analytics3(ARRAY_CONCAT_AGG(downloads_total_country)) AS downloads_total_country, | ||
ARRAY_CONCAT_AGG(pdf_book_country) AS downloads_pdf_book_country, | ||
ARRAY_CONCAT_AGG(pdf_chapter_country) AS downloads_pdf_chapter_country, | ||
ARRAY_CONCAT_AGG(html_book_country) AS downloads_html_book_country, | ||
ARRAY_CONCAT_AGG(html_chapter_country) AS downloads_html_chapter_country, | ||
ARRAY_CONCAT_AGG(epub_book_country) AS downloads_epub_book_country, | ||
ARRAY_CONCAT_AGG(epub_chapter_country) AS downloads_epub_chapter_country, | ||
ARRAY_CONCAT_AGG(mobi_book_country) AS downloads_mobi_book_country, | ||
ARRAY_CONCAT_AGG(mobi_chapter_country) AS downloads_mobi_chapter_country | ||
) AS metrics | ||
FROM | ||
google_analytics3_grouped_metrics | ||
GROUP BY | ||
publication_id, | ||
release_date | ||
) |
16 changes: 16 additions & 0 deletions
16
oaebu_workflows/google_analytics3_telescope/sql/bp_functions_google_analytics3.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
-- Output Schema: | ||
-- name STRING NULLABLE | ||
-- value INTEGER NULLABLE | ||
CREATE TEMP FUNCTION group_items_google_analytics3(items ARRAY < STRUCT < name STRING, value INT64 > >) as ( | ||
ARRAY( | ||
( | ||
SELECT | ||
AS STRUCT name, | ||
SUM(value) as value, | ||
FROM | ||
UNNEST(items) | ||
GROUP BY | ||
name | ||
) | ||
) | ||
); |
35 changes: 35 additions & 0 deletions
35
oaebu_workflows/google_books_telescope/sql/bp_body_google_books_sales.sql.jinja2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
-- The purpose of this block of SQL is to organise the metrics from google book sales for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_sales_table_id'. | ||
-- This will either point to 'empty_google_books_sales' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
google_books_sales_metrics as ( | ||
SELECT | ||
Primary_ISBN as ISBN13, | ||
release_date, | ||
STRUCT( | ||
SUM(qty) as qty, | ||
group_items_google_books_sales(ARRAY_AGG(STRUCT(Country_of_Sale, qty))) as countries | ||
) as metrics | ||
FROM | ||
`{{ google_books_sales_table_id }}` | ||
GROUP BY | ||
Primary_ISBN, | ||
release_date | ||
), | ||
-- The purpose of this block of SQL is to organise the Metadata from google book sales for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_sales_table_id'. | ||
-- This will either point to 'empty_google_books_sales' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
google_books_sales_metadata as ( | ||
SELECT | ||
Primary_ISBN as ISBN13, | ||
MAX(Imprint_Name) as Imprint_Name, | ||
MAX(Title) as Title, | ||
MAX(Author) as Author | ||
FROM | ||
`{{ google_books_sales_table_id }}` | ||
GROUP BY | ||
Primary_ISBN | ||
) |
37 changes: 37 additions & 0 deletions
37
oaebu_workflows/google_books_telescope/sql/bp_body_google_books_traffic.sql.jinja2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
-- The purpose of this block of SQL is to organise the metrics from google book traffic for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_traffic_table_id'. | ||
-- This will either point to 'empty_google_books_traffic' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
google_books_traffic_metrics as ( | ||
SELECT | ||
Primary_ISBN as ISBN13, | ||
release_date, | ||
STRUCT( | ||
SUM(Book_Visits_BV_) as Book_Visits_BV_, | ||
SUM(BV_with_Pages_Viewed) as BV_with_Pages_Viewed, | ||
SUM(Non_Unique_Buy_Clicks) as Non_Unique_Buy_Clicks, | ||
SUM(BV_with_Buy_Clicks) as BV_with_Buy_Clicks, | ||
SUM(Buy_Link_CTR) as Buy_Link_CTR, | ||
SUM(Pages_Viewed) as Pages_Viewed | ||
) as metrics | ||
FROM | ||
`{{ google_books_traffic_table_id }}` | ||
GROUP BY | ||
Primary_ISBN, | ||
release_date | ||
), | ||
-- The purpose of this block of SQL is to organise the Metadata from google book traffic for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_traffic_table_id'. | ||
-- This will either point to 'empty_google_books_traffic' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
google_books_traffic_metadata as ( | ||
SELECT | ||
Primary_ISBN as ISBN13, | ||
MAX(title) as Title | ||
FROM | ||
`{{ google_books_traffic_table_id }}` | ||
GROUP BY | ||
Primary_ISBN | ||
) |
19 changes: 19 additions & 0 deletions
19
oaebu_workflows/google_books_telescope/sql/bp_functions_google_books_sales.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
-- Output Schema: | ||
-- Country_of_Sale STRING NULLABLE | ||
-- qty INTEGER NULLABLE | ||
CREATE TEMP FUNCTION group_items_google_books_sales( | ||
items ARRAY < STRUCT < Country_of_Sale STRING, | ||
qty INT64 > > | ||
) as ( | ||
ARRAY( | ||
( | ||
SELECT | ||
AS STRUCT Country_of_Sale, | ||
SUM(qty) as qty, | ||
FROM | ||
UNNEST(items) | ||
GROUP BY | ||
Country_of_Sale | ||
) | ||
) | ||
); |
37 changes: 37 additions & 0 deletions
37
oaebu_workflows/irus_fulcrum_telescope/sql/bp_body_irus_fulcrum.sql.jinja2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
-- The purpose of this block of SQL is to organise the metrics from IRUS Fulcrum for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_fulcrum_table_id'. | ||
-- This will either point to 'empty_irus_fulcrum' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
irus_fulcrum_metrics as ( | ||
SELECT | ||
ISBN as ISBN13, | ||
release_date, | ||
STRUCT( | ||
SUM(total_item_investigations) as total_item_investigations, | ||
SUM(total_item_requests) as total_item_requests, | ||
SUM(unique_item_investigations) as unique_item_investigations, | ||
SUM(unique_item_requests) as unique_item_requests, | ||
group_items_irus_fulcrum_country(ARRAY_CONCAT_AGG(country)) as country | ||
) as metrics | ||
FROM | ||
`{{ irus_fulcrum_table_id }}` | ||
GROUP BY | ||
ISBN, | ||
release_date | ||
), | ||
-- The purpose of this block of SQL is to organise the Metadata from IRUS OAPEN for easier consumption of downstream queries. | ||
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_fulcrum_table_id'. | ||
-- This will either point to 'empty_irus_oapen' (defined above as an empty row) or the name of the real data table in bigquery. | ||
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query. | ||
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same. | ||
irus_fulcrum_metadata as ( | ||
SELECT | ||
ISBN as ISBN13, | ||
MAX(book_title) as book_title, | ||
MAX(publisher) as publisher | ||
FROM | ||
`{{ irus_fulcrum_table_id }}` | ||
GROUP BY | ||
ISBN | ||
) |
31 changes: 31 additions & 0 deletions
31
oaebu_workflows/irus_fulcrum_telescope/sql/bp_functions_irus_fulcrum.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
-- Output Schema: | ||
-- name STRING NULLABLE | ||
-- code STRING NULLABLE | ||
-- total_item_investigations INTEGER NULLABLE | ||
-- total_item_requests INTEGER NULLABLE | ||
-- unique_item_investigations INTEGER NULLABLE | ||
-- unique_item_requests INTEGER NULLABLE | ||
CREATE TEMP FUNCTION group_items_irus_fulcrum_country( | ||
items ARRAY < STRUCT < name STRING, | ||
code STRING, | ||
total_item_investigations INT64, | ||
total_item_requests INT64, | ||
unique_item_investigations INT64, | ||
unique_item_requests INT64 > > | ||
) as ( | ||
ARRAY( | ||
( | ||
SELECT | ||
AS STRUCT name, | ||
MAX(code) as code, | ||
SUM(total_item_investigations) as total_item_investigations, | ||
SUM(total_item_requests) as total_item_requests, | ||
SUM(unique_item_investigations) as unique_item_investigations, | ||
SUM(unique_item_requests) as unique_item_requests | ||
FROM | ||
UNNEST(items) | ||
GROUP BY | ||
name | ||
) | ||
) | ||
); |
Oops, something went wrong.