Skip to content

Commit

Permalink
First pass modular SQL
Browse files Browse the repository at this point in the history
  • Loading branch information
keegansmith21 committed Dec 18, 2023
1 parent 3305b70 commit 89b1bad
Show file tree
Hide file tree
Showing 40 changed files with 231 additions and 304 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
-- The purpose of this block of SQL is to organise the metrics from google analytics for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_analytics3_table_id'.
-- This will either point to 'empty_google_analytics3' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from google analytics for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_analytics3_table_id'.
google_analytics3_grouped_metrics AS(
SELECT
publication_id,
Expand Down Expand Up @@ -148,6 +145,7 @@ google_analytics3_grouped_metrics AS(
publication_whole_or_part,
publication_format
),

google_analytics3_metrics AS (
SELECT
publication_id AS ISBN13,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ STRUCT(
month.google_analytics.downloads_mobi_book_country
)
) as downloads_mobi_book
) as google_analytics,
) as google_analytics
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ google_analytics_downloads_mobi_book_month_country as (
UNNEST(google_analytics.downloads_mobi_book_country)
) as google
LEFT JOIN `{{ country_table_id }}` as country on country.google_analytics_name = google.country_name
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
STRUCT(
group_counts(
ARRAY_CONCAT_AGG(month.google_analytics.views_total_country)
) as page_views,
group_counts(
ARRAY_CONCAT_AGG(month.google_analytics.downloads_total_country)
) as downloads,
group_counts(
ARRAY_CONCAT_AGG(
month.google_analytics.downloads_pdf_book_country
)
) as downloads_pdf_book,
group_counts(
ARRAY_CONCAT_AGG(
month.google_analytics.downloads_pdf_chapter_country
)
) as downloads_pdf_chapter,
group_counts(
ARRAY_CONCAT_AGG(
month.google_analytics.downloads_html_chapter_country
)
) as downloads_html_chapter,
group_counts(
ARRAY_CONCAT_AGG(
month.google_analytics.downloads_epub_book_country
)
) as downloads_epub_book,
group_counts(
ARRAY_CONCAT_AGG(
month.google_analytics.downloads_epub_chapter_country
)
) as downloads_epub_chapter,
group_counts(
ARRAY_CONCAT_AGG(
month.google_analytics.downloads_mobi_book_country
)
) as downloads_mobi_book
) as google_analytics
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- The purpose of this block of SQL is to organise the metrics from google book sales for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_sales_table_id'.
-- This will either point to 'empty_google_books_sales' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from google book sales for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_sales_table_id'.
# This will either point to 'empty_google_books_sales' (defined above as an empty row) or the name of the real data table in bigquery.
# The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
# Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
google_books_sales_metrics as (
SELECT
Primary_ISBN as ISBN13,
Expand All @@ -17,11 +17,12 @@ google_books_sales_metrics as (
Primary_ISBN,
release_date
),
-- The purpose of this block of SQL is to organise the Metadata from google book sales for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_sales_table_id'.
-- This will either point to 'empty_google_books_sales' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.

# The purpose of this block of SQL is to organise the Metadata from google book sales for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_sales_table_id'.
# This will either point to 'empty_google_books_sales' (defined above as an empty row) or the name of the real data table in bigquery.
# The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
# Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
google_books_sales_metadata as (
SELECT
Primary_ISBN as ISBN13,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
-- The purpose of this block of SQL is to organise the metrics from google book traffic for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_traffic_table_id'.
-- This will either point to 'empty_google_books_traffic' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from google book traffic for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_traffic_table_id'.
google_books_traffic_metrics as (
SELECT
Primary_ISBN as ISBN13,
Expand All @@ -21,11 +18,9 @@ google_books_traffic_metrics as (
Primary_ISBN,
release_date
),
-- The purpose of this block of SQL is to organise the Metadata from google book traffic for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_traffic_table_id'.
-- This will either point to 'empty_google_books_traffic' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.

# The purpose of this block of SQL is to organise the Metadata from google book traffic for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'google_books_traffic_table_id'.
google_books_traffic_metadata as (
SELECT
Primary_ISBN as ISBN13,
Expand All @@ -34,4 +29,4 @@ google_books_traffic_metadata as (
`{{ google_books_traffic_table_id }}`
GROUP BY
Primary_ISBN
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ google_books_month_country as (
FROM
months,
UNNEST(google_books_sales.countries)
),
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
STRUCT(
SUM(month.google_books_sales.qty) as qty
) as google_books_sales,
) as google_books_sales
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ STRUCT(
SUM(month.google_books_traffic.BV_with_Buy_Clicks) as BV_with_Buy_Clicks,
SUM(month.google_books_traffic.Buy_Link_CTR) as Buy_Link_CTR,
SUM(month.google_books_traffic.Pages_Viewed) as Pages_Viewed
) as google_books_traffic,
) as google_books_traffic
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
-- The purpose of this block of SQL is to organise the metrics from IRUS Fulcrum for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_fulcrum_table_id'.
-- This will either point to 'empty_irus_fulcrum' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from IRUS Fulcrum for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_fulcrum_table_id'.
irus_fulcrum_metrics as (
SELECT
ISBN as ISBN13,
Expand All @@ -20,11 +17,9 @@ irus_fulcrum_metrics as (
ISBN,
release_date
),
-- The purpose of this block of SQL is to organise the Metadata from IRUS OAPEN for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_fulcrum_table_id'.
-- This will either point to 'empty_irus_oapen' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.

# The purpose of this block of SQL is to organise the Metadata from IRUS OAPEN for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_fulcrum_table_id'.
irus_fulcrum_metadata as (
SELECT
ISBN as ISBN13,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ STRUCT(
month.irus_fulcrum.total_item_requests,
month.irus_fulcrum.unique_item_investigations,
month.irus_fulcrum.unique_item_requests
) AS irus_fulcrum,
) AS irus_fulcrum
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ irus_fulcrum_month_country as (
FROM
months,
UNNEST(irus_fulcrum.country)
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
irus_fulcrum.total_item_requests IS NOT NULL
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ STRUCT(
SUM(month.irus_fulcrum.total_item_requests) as total_item_requests,
SUM(month.irus_fulcrum.unique_item_investigations) as unique_item_investigations,
SUM(month.irus_fulcrum.unique_item_requests) as unique_item_requests
) as irus_fulcrum,
) as irus_fulcrum
Original file line number Diff line number Diff line change
@@ -1 +1 @@
irus_fulcrum.total_item_requests IS NOT NULL
month.irus_fulcrum IS NOT NULL
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
-- The purpose of this block of SQL is to organise the metrics from IRUS OAPEN for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'oapen_table_id'.
-- This will either point to 'empty_irus_oapen' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from IRUS OAPEN for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'oapen_table_id'.
irus_oapen_metrics as (
SELECT
ISBN as ISBN13,
Expand All @@ -23,11 +20,9 @@ irus_oapen_metrics as (
ISBN,
release_date
),
-- The purpose of this block of SQL is to organise the Metadata from IRUS OAPEN for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_oapen_table_id'.
-- This will either point to 'empty_irus_oapen' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.

# The purpose of this block of SQL is to organise the Metadata from IRUS OAPEN for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'irus_oapen_table_id'.
irus_oapen_metadata as (
SELECT
ISBN as ISBN13,
Expand All @@ -37,4 +32,4 @@ irus_oapen_metadata as (
`{{ irus_oapen_table_id }}`
GROUP BY
ISBN
),
)

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ irus_oapen_month_country as (
FROM
months,
UNNEST(irus_oapen.country)
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
irus_oapen.title_requests IS NOT NULL
OR irus_oapen.total_item_requests IS NOT NULL
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ STRUCT(
SUM(month.irus_oapen.total_item_requests) as total_item_requests,
SUM(month.irus_oapen.unique_item_investigations) as unique_item_investigations,
SUM(month.irus_oapen.unique_item_requests) as unique_item_requests
) as irus_oapen,
) as irus_oapen
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
irus_oapen.title_requests IS NOT NULL
OR irus_oapen.total_item_requests IS NOT NULL
month.irus_oapen IS NOT NULL
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
-- The purpose of this block of SQL is to organise the metrics from JSTOR country for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_country_table_id'.
-- This will either point to 'empty_jstor_country' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from JSTOR country for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_country_table_id'.
jstor_country_metrics as (
SELECT
eISBN as ISBN13,
Expand All @@ -16,11 +13,9 @@ jstor_country_metrics as (
eISBN,
release_date
),
-- The purpose of this block of SQL is to organise the Metadata from JSTOR country for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_country_table_id'.
-- This will either point to 'empty_jstor_country' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.

# The purpose of this block of SQL is to organise the Metadata from JSTOR country for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_country_table_id'.
jstor_country_metadata as (
SELECT
eISBN as ISBN13,
Expand All @@ -36,4 +31,4 @@ jstor_country_metadata as (
`{{ jstor_country_table_id }}`
GROUP BY
eISBN
),
)
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
-- The purpose of this block of SQL is to organise the metrics from JSTOR institution for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_institution_table_id'.
-- This will either point to 'empty_jstor_institution' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
# The purpose of this block of SQL is to organise the metrics from JSTOR institution for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_institution_table_id'.
jstor_institution_metrics as (
SELECT
eISBN as ISBN13,
Expand All @@ -16,11 +13,9 @@ jstor_institution_metrics as (
eISBN,
release_date
),
-- The purpose of this block of SQL is to organise the Metadata from JSTOR institution for easier consumption of downstream queries.
-- Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_institution_table_id'.
-- This will either point to 'empty_jstor_institution' (defined above as an empty row) or the name of the real data table in bigquery.
-- The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
-- Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.

# The purpose of this block of SQL is to organise the Metadata from JSTOR institution for easier consumption of downstream queries.
# Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'jstor_institution_table_id'.
jstor_institution_metadata as (
SELECT
eISBN as ISBN13,
Expand All @@ -36,4 +31,4 @@ jstor_institution_metadata as (
`{{ jstor_institution_table_id }}`
GROUP BY
eISBN
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ STRUCT(
FROM
UNNEST(month.jstor_country)
) AS Total_Item_Requests
) AS jstor,
) AS jstor
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ jstor_month_country as (
FROM
months,
UNNEST(jstor_country)
),
)
Loading

0 comments on commit 89b1bad

Please sign in to comment.