From 0eb145022d36372d96faa02e199fc6f67b6eb5e0 Mon Sep 17 00:00:00 2001 From: Bill Date: Wed, 3 Jul 2024 08:37:45 +0100 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20docs:=20add=20metric=20correlat?= =?UTF-8?q?ion=20question?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../challenging-sql-problems.md | 17 +-- .../metric-correlation--sample-input.sql | 15 +++ .../metric-correlation--sample-output.sql | 7 ++ .../problems/silver/metric-correlation.md | 72 +++++++++++ .../problems/silver/metric-correlation.sql | 114 ++++++++++++++++++ .../silver/metric-correlation--duckdb.sql | 35 ++++++ .../solutions/silver/metric-correlation.md | 39 ++++++ .../solutions/silver/metric-correlation.sql | 16 +++ mkdocs.yml | 2 + 9 files changed, 309 insertions(+), 8 deletions(-) create mode 100644 docs/challenging-sql-problems/problems/silver/metric-correlation--sample-input.sql create mode 100644 docs/challenging-sql-problems/problems/silver/metric-correlation--sample-output.sql create mode 100644 docs/challenging-sql-problems/problems/silver/metric-correlation.md create mode 100644 docs/challenging-sql-problems/problems/silver/metric-correlation.sql create mode 100644 docs/challenging-sql-problems/solutions/silver/metric-correlation--duckdb.sql create mode 100644 docs/challenging-sql-problems/solutions/silver/metric-correlation.md create mode 100644 docs/challenging-sql-problems/solutions/silver/metric-correlation.sql diff --git a/docs/challenging-sql-problems/challenging-sql-problems.md b/docs/challenging-sql-problems/challenging-sql-problems.md index f1993e7..ba034c2 100644 --- a/docs/challenging-sql-problems/challenging-sql-problems.md +++ b/docs/challenging-sql-problems/challenging-sql-problems.md @@ -29,14 +29,15 @@ As long as you know your database features and query patterns, these are fairly These require a bit more thinking. -1. [Funnel analytics](problems/silver/funnel-analytics.md) -2. [Bannable login activity](problems/silver/bannable-login-activity.md) -3. [Bus routes](problems/silver/bus-routes.md) -4. [Decoding datelist ints](problems/silver/decoding-datelist-ints.md) -5. [Region precipitation](problems/silver/region-precipitation.md) -6. [Predicting values](problems/silver/predicting-values.md) -7. [Mandelbrot set](problems/silver/mandelbrot-set.md) -8. [Customer sales running totals](problems/silver/customer-sales-running-totals.md) +1. [Metric correlation](problems/silver/metric-correlation.md) +2. [Funnel analytics](problems/silver/funnel-analytics.md) +3. [Bannable login activity](problems/silver/bannable-login-activity.md) +4. [Bus routes](problems/silver/bus-routes.md) +5. [Decoding datelist ints](problems/silver/decoding-datelist-ints.md) +6. [Region precipitation](problems/silver/region-precipitation.md) +7. [Predicting values](problems/silver/predicting-values.md) +8. [Mandelbrot set](problems/silver/mandelbrot-set.md) +9. [Customer sales running totals](problems/silver/customer-sales-running-totals.md) ### 🟡 Gold Tier diff --git a/docs/challenging-sql-problems/problems/silver/metric-correlation--sample-input.sql b/docs/challenging-sql-problems/problems/silver/metric-correlation--sample-input.sql new file mode 100644 index 0000000..4385c9e --- /dev/null +++ b/docs/challenging-sql-problems/problems/silver/metric-correlation--sample-input.sql @@ -0,0 +1,15 @@ +```sql +with metrics(customer_id, segment, metric_1, metric_2, metric_3, metric_4, metric_5) as ( + values + ( 1, 1, 21, 58, 66, 79, 29), + ( 2, 0, 70, 55, 79, 125, 2), + ( 3, 1, 68, 55, 10, 123, 70), + ( 4, 1, 20, 62, 59, 82, 25), + ( 5, 0, 42, 9, 80, 51, 13), + ( 6, 1, 26, 89, 17, 115, 66), + ( 7, 1, 45, 51, 90, 96, 17), + ( 8, 0, 4, 52, 47, 56, 61), + ( 9, 0, 57, 48, 82, 105, 40), + (10, 1, 17, 93, 45, 109, 76) +) +``` diff --git a/docs/challenging-sql-problems/problems/silver/metric-correlation--sample-output.sql b/docs/challenging-sql-problems/problems/silver/metric-correlation--sample-output.sql new file mode 100644 index 0000000..de2eb60 --- /dev/null +++ b/docs/challenging-sql-problems/problems/silver/metric-correlation--sample-output.sql @@ -0,0 +1,7 @@ +```sql +solution(segment, metric_pair, correlation) as ( + values + (0, 'metric_1, metric_3', 0.9051), + (1, 'metric_4, metric_5', 0.8357) +) +``` diff --git a/docs/challenging-sql-problems/problems/silver/metric-correlation.md b/docs/challenging-sql-problems/problems/silver/metric-correlation.md new file mode 100644 index 0000000..7afabdc --- /dev/null +++ b/docs/challenging-sql-problems/problems/silver/metric-correlation.md @@ -0,0 +1,72 @@ +# Metric correlation 🔀 + +> [!SUCCESS] Scenario +> +> A company has banded their customers into segments and calculated several metrics for each customer. +> +> The company wants to know which pairs of metrics are most correlated within each segment. + +> [!QUESTION] +> +> For each customer segment, find the highest correlated pair of metrics. +> +> The correlation should be rounded to four decimal places, and the output should keep ties in the (rounded) correlation. Don't compare a metric to itself 😄 +> +> The output should have a row per segment and metric pair, with the columns: +> +> - `segment` +> - `metric_pair` as the metric pair in the format `metric_1, metric_2`. Put the ([lexicographically](https://www.reddit.com/r/explainlikeimfive/comments/wf5s2e/eli5_what_are_lexicographical_order_in_computer/)) lower metric name on the left using your database's default [collation](https://stackoverflow.com/a/4538738/8213085) +> - `correlation` as the correlation between the two metrics, rounded to four decimal places +> +> Order the output by `segment` and `metric_pair`. + +
+Expand for the DDL +--8<-- "docs/challenging-sql-problems/problems/silver/metric-correlation.sql" +
+ +The solution can be found at: + +- [metric-correlation.md](../../solutions/silver/metric-correlation.md) + +--- + + +>? INFO: **Sample input** +> +> | customer_id | segment | metric_1 | metric_2 | metric_3 | metric_4 | metric_5 | +> |------------:|--------:|---------:|---------:|---------:|---------:|---------:| +> | 1 | 1 | 21 | 58 | 66 | 79 | 29 | +> | 2 | 0 | 70 | 55 | 79 | 125 | 2 | +> | 3 | 1 | 68 | 55 | 10 | 123 | 70 | +> | 4 | 1 | 20 | 62 | 59 | 82 | 25 | +> | 5 | 0 | 42 | 9 | 80 | 51 | 13 | +> | 6 | 1 | 26 | 89 | 17 | 115 | 66 | +> | 7 | 1 | 45 | 51 | 90 | 96 | 17 | +> | 8 | 0 | 4 | 52 | 47 | 56 | 61 | +> | 9 | 0 | 57 | 48 | 82 | 105 | 40 | +> | 10 | 1 | 17 | 93 | 45 | 109 | 76 | +> +--8<-- "docs/challenging-sql-problems/problems/silver/metric-correlation--sample-input.sql" + + +>? INFO: **Sample output** +> +> | segment | metric_pair | correlation | +> |--------:|:-------------------|------------:| +> | 0 | metric_1, metric_3 | 0.9051 | +> | 1 | metric_4, metric_5 | 0.8357 | +> +--8<-- "docs/challenging-sql-problems/problems/silver/metric-correlation--sample-output.sql" + + +>? TIP: **Hint 1** +> +> Use a correlation function, usually called `CORR`, to calculate the correlation between two metrics. +> +> If you're using a database that doesn't have a built-in correlation function, you can try to calculate it manually -- but I'd instead recommend skipping this question. + + +>? TIP: **Hint 2** +> +> To get every pair of metrics "side by side" for the `CORR` function, unpivot the table so that each metric is in its own row and then join the resulting table to itself on the `segment` and `customer_id` columns. diff --git a/docs/challenging-sql-problems/problems/silver/metric-correlation.sql b/docs/challenging-sql-problems/problems/silver/metric-correlation.sql new file mode 100644 index 0000000..5acc90d --- /dev/null +++ b/docs/challenging-sql-problems/problems/silver/metric-correlation.sql @@ -0,0 +1,114 @@ +```sql +create table metrics ( + customer_id integer primary key, + segment integer not null, + metric_1 integer not null, + metric_2 integer not null, + metric_3 integer not null, + metric_4 integer not null, + metric_5 integer not null +); +insert into metrics +values + ( 1, 7, 17, 93, 23, 110, 41), + ( 2, 4, 22, 67, 38, 89, 37), + ( 3, 6, 62, 30, 90, 92, 178), + ( 4, 1, 20, 82, 87, 102, 37), + ( 5, 1, 7, 34, 50, 41, 77), + ( 6, 8, 49, 6, 24, 55, 19), + ( 7, 2, 19, 64, 63, 83, 9), + ( 8, 4, 46, 59, 21, 105, 64), + ( 9, 1, 28, 74, 10, 103, 59), + ( 10, 6, 92, 77, 28, 170, 52), + ( 11, 7, 27, 78, 33, 106, 77), + ( 12, 4, 10, 17, 97, 28, 88), + ( 13, 6, 47, 74, 37, 122, 76), + ( 14, 1, 93, 7, 69, 100, 8), + ( 15, 4, 93, 38, 71, 131, 71), + ( 16, 7, 21, 58, 43, 80, 74), + ( 17, 1, 37, 7, 54, 44, 15), + ( 18, 2, 58, 94, 62, 152, 99), + ( 19, 7, 89, 52, 88, 142, 71), + ( 20, 5, 100, 7, 48, 107, 99), + ( 21, 7, 50, 89, 31, 139, 69), + ( 22, 5, 2, 28, 40, 29, 89), + ( 23, 2, 78, 74, 52, 153, 39), + ( 24, 3, 61, 65, 27, 126, 51), + ( 25, 10, 48, 98, 93, 145, 93), + ( 26, 9, 60, 21, 54, 81, 85), + ( 27, 4, 19, 48, 19, 67, 25), + ( 28, 3, 56, 26, 53, 83, 98), + ( 29, 4, 32, 43, 86, 75, 67), + ( 30, 6, 41, 1, 66, 43, 132), + ( 31, 7, 97, 32, 91, 129, 4), + ( 32, 0, 91, 85, 52, 176, 65), + ( 33, 10, 59, 66, 18, 125, 88), + ( 34, 5, 83, 35, 77, 118, 153), + ( 35, 2, 38, 52, 32, 89, 24), + ( 36, 0, 10, 0, 75, 10, 62), + ( 37, 6, 19, 92, 31, 111, 53), + ( 38, 7, 99, 3, 24, 102, 71), + ( 39, 2, 54, 41, 61, 96, 17), + ( 40, 6, 75, 61, 50, 136, 108), + ( 41, 4, 88, 46, 68, 133, 38), + ( 42, 0, 87, 60, 32, 146, 27), + ( 43, 9, 41, 25, 91, 65, 34), + ( 44, 7, 76, 28, 39, 103, 56), + ( 45, 2, 18, 14, 13, 31, 29), + ( 46, 8, 26, 38, 1, 64, 53), + ( 47, 10, 90, 53, 30, 143, 56), + ( 48, 4, 3, 20, 57, 23, 32), + ( 49, 7, 28, 5, 71, 34, 98), + ( 50, 0, 32, 37, 30, 69, 82), + ( 51, 3, 69, 35, 63, 105, 125), + ( 52, 4, 67, 70, 41, 136, 70), + ( 53, 8, 53, 68, 67, 122, 58), + ( 54, 8, 14, 71, 95, 85, 13), + ( 55, 5, 55, 88, 67, 143, 133), + ( 56, 7, 42, 80, 8, 121, 2), + ( 57, 10, 69, 79, 30, 148, 99), + ( 58, 3, 87, 57, 69, 144, 132), + ( 59, 7, 59, 68, 80, 128, 58), + ( 60, 5, 34, 36, 59, 70, 118), + ( 61, 6, 54, 94, 25, 148, 44), + ( 62, 10, 62, 45, 68, 107, 7), + ( 63, 6, 97, 98, 99, 195, 201), + ( 64, 0, 64, 25, 43, 89, 25), + ( 65, 7, 15, 38, 51, 53, 21), + ( 66, 3, 37, 15, 1, 51, 2), + ( 67, 8, 34, 64, 12, 98, 68), + ( 68, 3, 85, 58, 30, 143, 58), + ( 69, 7, 21, 90, 79, 111, 55), + ( 70, 5, 43, 64, 18, 107, 41), + ( 71, 5, 53, 85, 22, 138, 45), + ( 72, 10, 67, 49, 70, 117, 11), + ( 73, 10, 97, 5, 6, 102, 11), + ( 74, 7, 26, 59, 42, 85, 55), + ( 75, 6, 5, 25, 92, 30, 176), + ( 76, 4, 76, 26, 3, 102, 95), + ( 77, 1, 18, 53, 95, 71, 8), + ( 78, 0, 79, 51, 82, 130, 74), + ( 79, 1, 72, 63, 3, 136, 48), + ( 80, 9, 45, 24, 5, 70, 47), + ( 81, 0, 46, 40, 19, 86, 73), + ( 82, 8, 34, 72, 17, 107, 54), + ( 83, 7, 36, 54, 47, 90, 3), + ( 84, 3, 50, 57, 8, 107, 15), + ( 85, 2, 66, 11, 7, 77, 53), + ( 86, 0, 0, 82, 95, 83, 5), + ( 87, 4, 13, 38, 16, 51, 14), + ( 88, 6, 61, 10, 31, 72, 56), + ( 89, 7, 21, 17, 34, 38, 34), + ( 90, 3, 77, 31, 96, 108, 197), + ( 91, 3, 90, 27, 44, 117, 79), + ( 92, 0, 87, 24, 64, 110, 38), + ( 93, 6, 90, 32, 98, 122, 193), + ( 94, 10, 82, 65, 19, 147, 48), + ( 95, 1, 58, 93, 68, 152, 9), + ( 96, 7, 42, 43, 16, 84, 27), + ( 97, 5, 29, 31, 1, 60, 9), + ( 98, 2, 49, 70, 58, 119, 6), + ( 99, 6, 51, 51, 86, 102, 162), + (100, 4, 18, 35, 85, 53, 30) +; +``` diff --git a/docs/challenging-sql-problems/solutions/silver/metric-correlation--duckdb.sql b/docs/challenging-sql-problems/solutions/silver/metric-correlation--duckdb.sql new file mode 100644 index 0000000..b048240 --- /dev/null +++ b/docs/challenging-sql-problems/solutions/silver/metric-correlation--duckdb.sql @@ -0,0 +1,35 @@ +```sql +with + +unpivoted as ( + unpivot metrics + on columns(* exclude (segment, customer_id)) + into + name metric_name + value metric_value +), + +joined as ( + select + l.customer_id, + l.segment, + l.metric_name as l_metric_name, + r.metric_name as r_metric_name, + l.metric_value as l_metric_value, + r.metric_value as r_metric_value + from unpivoted as l + inner join unpivoted as r + on l.customer_id = r.customer_id + and l.segment = r.segment + and l.metric_name < r.metric_name /* No point in pairing metrics twice */ +) + +select + segment, + l_metric_name || ', ' || r_metric_name as metric_pair, + round(corr(l_metric_value, r_metric_value), 4) as correlation +from joined +group by segment, l_metric_name, r_metric_name +qualify correlation = max(correlation) over (partition by segment) +order by segment, metric_pair +``` diff --git a/docs/challenging-sql-problems/solutions/silver/metric-correlation.md b/docs/challenging-sql-problems/solutions/silver/metric-correlation.md new file mode 100644 index 0000000..96e6bb1 --- /dev/null +++ b/docs/challenging-sql-problems/solutions/silver/metric-correlation.md @@ -0,0 +1,39 @@ +# Metric correlation 🔀 + +> [!TIP] +> +> Solution to the following problem: +> +> - [metric-correlation.md](../../problems/silver/metric-correlation.md) + +## Result Set + +Regardless of the database (since they should all use a similar collation), the result set should look like: + +| segment | metric_pair | correlation | +| ------: | :----------------- | ----------: | +| 0 | metric_1, metric_4 | 0.8195 | +| 1 | metric_2, metric_4 | 0.6741 | +| 2 | metric_2, metric_4 | 0.8539 | +| 3 | metric_3, metric_5 | 0.9975 | +| 4 | metric_1, metric_4 | 0.9122 | +| 5 | metric_3, metric_5 | 0.9985 | +| 6 | metric_3, metric_5 | 0.9961 | +| 7 | metric_2, metric_4 | 0.5686 | +| 8 | metric_2, metric_4 | 0.8405 | +| 9 | metric_1, metric_5 | 0.9989 | +| 10 | metric_2, metric_5 | 0.8042 | + +
+Expand for the DDL +--8<-- "docs/challenging-sql-problems/solutions/silver/metric-correlation.sql" +
+ +## Solution + +Some SQL solutions per database are provided below. + + +> SUCCESS: **DuckDB** +> +--8<-- "docs/challenging-sql-problems/solutions/silver/metric-correlation--duckdb.sql" diff --git a/docs/challenging-sql-problems/solutions/silver/metric-correlation.sql b/docs/challenging-sql-problems/solutions/silver/metric-correlation.sql new file mode 100644 index 0000000..e1f8d76 --- /dev/null +++ b/docs/challenging-sql-problems/solutions/silver/metric-correlation.sql @@ -0,0 +1,16 @@ +```sql +solution(segment, metric_pair, correlation) as ( + values + ( 0, 'metric_1, metric_4', 0.8195), + ( 1, 'metric_2, metric_4', 0.6741), + ( 2, 'metric_2, metric_4', 0.8539), + ( 3, 'metric_3, metric_5', 0.9975), + ( 4, 'metric_1, metric_4', 0.9122), + ( 5, 'metric_3, metric_5', 0.9985), + ( 6, 'metric_3, metric_5', 0.9961), + ( 7, 'metric_2, metric_4', 0.5686), + ( 8, 'metric_2, metric_4', 0.8405), + ( 9, 'metric_1, metric_5', 0.9989), + (10, 'metric_2, metric_5', 0.8042) +) +``` diff --git a/mkdocs.yml b/mkdocs.yml index 9e1866e..77f0707 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -85,6 +85,7 @@ nav: - challenging-sql-problems/problems/bronze/fibonacci-sequence.md # recursive CTE - challenging-sql-problems/problems/bronze/uk-bank-holidays.md # unpivot and unnest - ⚪ Silver Tier: + - challenging-sql-problems/problems/silver/metric-correlation.md # unpivot, corr - challenging-sql-problems/problems/silver/funnel-analytics.md # custom axis - challenging-sql-problems/problems/silver/bannable-login-activity.md # window functions (gaps and islands, range between) - challenging-sql-problems/problems/silver/bus-routes.md # recursive CTE @@ -108,6 +109,7 @@ nav: - challenging-sql-problems/solutions/bronze/fibonacci-sequence.md - challenging-sql-problems/solutions/bronze/uk-bank-holidays.md - ⚪ Silver Tier: + - challenging-sql-problems/solutions/silver/metric-correlation.md - challenging-sql-problems/solutions/silver/funnel-analytics.md - challenging-sql-problems/solutions/silver/bannable-login-activity.md - challenging-sql-problems/solutions/silver/bus-routes.md From 70443a191fc8f24660ba043593b0ba0303dae563 Mon Sep 17 00:00:00 2001 From: Bill Date: Sat, 6 Jul 2024 10:37:11 +0100 Subject: [PATCH 2/3] =?UTF-8?q?=E2=99=BB=20refactor:=20avoid=20duplicating?= =?UTF-8?q?=20tables?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../problems/gold/encoding-datelist-ints.md | 17 +---------------- .../problems/gold/loan-repayment-schedule.md | 7 +------ 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/docs/challenging-sql-problems/problems/gold/encoding-datelist-ints.md b/docs/challenging-sql-problems/problems/gold/encoding-datelist-ints.md index 7030c20..3b7ce49 100644 --- a/docs/challenging-sql-problems/problems/gold/encoding-datelist-ints.md +++ b/docs/challenging-sql-problems/problems/gold/encoding-datelist-ints.md @@ -84,22 +84,7 @@ A worked example is provided below to help illustrate the encoding. ### Worked example -To help illustrate the encoding, consider the following events: - -| event_id | user_id | event_datetime | event_type | -| -------: | ------: | :------------------ | :--------- | -| 1 | 1 | 2024-01-01 01:03:00 | login | -| 2 | 1 | 2024-01-04 01:02:00 | login | -| 3 | 1 | 2024-01-05 01:01:00 | login | -| 4 | 1 | 2024-01-06 01:00:00 | logout | -| 5 | 1 | 2024-01-07 01:05:00 | logout | -| 6 | 1 | 2024-01-07 01:06:00 | logout | -| 7 | 2 | 2024-01-08 01:07:00 | login | -| 8 | 2 | 2024-01-09 01:08:00 | login | -| 9 | 2 | 2024-01-10 01:09:00 | login | -| 10 | 2 | 2024-01-10 01:10:00 | logout | -| 11 | 2 | 2024-01-11 01:11:00 | logout | -| 12 | 2 | 2024-01-12 01:12:00 | logout | +To help illustrate the encoding, consider the following events in the **Sample input**. We'll walk through each of the events and how they contribute to different sessions. diff --git a/docs/challenging-sql-problems/problems/gold/loan-repayment-schedule.md b/docs/challenging-sql-problems/problems/gold/loan-repayment-schedule.md index a4e26e5..f71eeb1 100644 --- a/docs/challenging-sql-problems/problems/gold/loan-repayment-schedule.md +++ b/docs/challenging-sql-problems/problems/gold/loan-repayment-schedule.md @@ -101,12 +101,7 @@ A worked example is provided below to help illustrate the loan calculations. ### Worked example -To help illustrate the loan calculations, consider the following loan: - -- `loan_value`: 10,000.00 -- `interest_rate`: 0.01 -- `repayments`: 6 -- `start_date`: 2024-01-01 +To help illustrate the loan calculations, consider the loan in the **Sample input**. A loan with these details will have a monthly repayment value of 1,725.48 (rounded to 2 decimal places). From bfc41b4c8a9338ad87f4af43a40c78eb3f5382ca Mon Sep 17 00:00:00 2001 From: Bill Date: Sat, 6 Jul 2024 10:37:45 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=E2=9C=A8=20docs:=20add=20supply=20chain=20?= =?UTF-8?q?network=20question?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../challenging-sql-problems.md | 7 +- .../supply-chain-network--sample-input.sql | 39 ++++ .../supply-chain-network--sample-output.sql | 9 + .../problems/gold/supply-chain-network.md | 207 ++++++++++++++++++ .../problems/gold/supply-chain-network.sql | 89 ++++++++ .../solutions/gold/supply-chain-network.md | 27 +++ mkdocs.yml | 6 +- 7 files changed, 379 insertions(+), 5 deletions(-) create mode 100644 docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-input.sql create mode 100644 docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-output.sql create mode 100644 docs/challenging-sql-problems/problems/gold/supply-chain-network.md create mode 100644 docs/challenging-sql-problems/problems/gold/supply-chain-network.sql create mode 100644 docs/challenging-sql-problems/solutions/gold/supply-chain-network.md diff --git a/docs/challenging-sql-problems/challenging-sql-problems.md b/docs/challenging-sql-problems/challenging-sql-problems.md index ba034c2..2c554cf 100644 --- a/docs/challenging-sql-problems/challenging-sql-problems.md +++ b/docs/challenging-sql-problems/challenging-sql-problems.md @@ -43,9 +43,10 @@ These require a bit more thinking. Expect to spend a bit of time on these. -1. [Encoding datelist ints](problems/gold/encoding-datelist-ints.md) -2. [Loan repayment schedules](problems/gold/loan-repayment-schedule.md) -3. [Travel plans](problems/gold/travel-plans.md) +1. [Loan repayment schedules](problems/gold/loan-repayment-schedule.md) +2. [Supply chain network](problems/gold/supply-chain-network.md) +3. [Encoding datelist ints](problems/gold/encoding-datelist-ints.md) +4. [Travel plans](problems/gold/travel-plans.md) --- diff --git a/docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-input.sql b/docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-input.sql new file mode 100644 index 0000000..e5e2e99 --- /dev/null +++ b/docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-input.sql @@ -0,0 +1,39 @@ +```sql +with + +locations(location_id, location_type) as ( + values + (1, 'supplier'), + (2, 'supplier'), + (3, 'depot'), + (4, 'depot'), + (5, 'store') +), + +deliveries(delivery_date, from_location_id, to_location_id, product_id, quantity) as ( + values + ('2024-01-01 01:23:53'::timestamp, 1, 3, 123, 25), + ('2024-01-01 06:27:54'::timestamp, 2, 4, 123, 25), + ('2024-01-01 12:27:39'::timestamp, 4, 5, 123, 25), + ('2024-01-01 17:12:59'::timestamp, 1, 3, 123, 30), + ('2024-01-02 01:27:57'::timestamp, 3, 5, 123, 25), + ('2024-01-02 05:16:08'::timestamp, 3, 4, 123, 30), + ('2024-01-02 05:40:53'::timestamp, 2, 3, 123, 20), + ('2024-01-02 07:29:53'::timestamp, 1, 4, 123, 30), + ('2024-01-02 09:22:53'::timestamp, 3, 5, 123, 20), + ('2024-01-02 18:28:39'::timestamp, 4, 5, 123, 60) +), + +sales(sale_datetime, store_id, product_id, quantity) as ( + values + ('2024-01-01 14:56:12'::timestamp, 5, 123, 5), + ('2024-01-01 16:28:24'::timestamp, 5, 123, 3), + ('2024-01-01 16:35:38'::timestamp, 5, 123, 4), + ('2024-01-01 20:13:46'::timestamp, 5, 123, 2), + ('2024-01-02 09:37:11'::timestamp, 5, 123, 12), + ('2024-01-02 14:02:57'::timestamp, 5, 123, 30), + ('2024-01-02 14:21:39'::timestamp, 5, 123, 3), + ('2024-01-02 16:44:26'::timestamp, 5, 123, 8), + ('2024-01-02 18:28:37'::timestamp, 5, 123, 2) +) +``` diff --git a/docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-output.sql b/docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-output.sql new file mode 100644 index 0000000..62b842e --- /dev/null +++ b/docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-output.sql @@ -0,0 +1,9 @@ +```sql +solution(stock_date, store_id, supplier_id, stock_volume, stock_proportion) as ( + values + ('2024-01-01'::date, 5, 1, 0, 0.00), + ('2024-01-01'::date, 5, 2, 11, 100.00), + ('2024-01-02'::date, 5, 1, 30, 49.18), + ('2024-01-02'::date, 5, 2, 31, 50.82) +) +``` diff --git a/docs/challenging-sql-problems/problems/gold/supply-chain-network.md b/docs/challenging-sql-problems/problems/gold/supply-chain-network.md new file mode 100644 index 0000000..082cd42 --- /dev/null +++ b/docs/challenging-sql-problems/problems/gold/supply-chain-network.md @@ -0,0 +1,207 @@ +# Supply chain network 🚛 + +> [!SUCCESS] Scenario +> +> A supermarket's supply chain has three main components: stores, depots, and suppliers. +> +> In general, stock is sent from a supplier to a depot, and then from the depot to a store; however, there are cases where suppliers send stock directly to stores and depots send stock to other depots. +> +> For example: +> +> ```mermaid +> graph LR +> supplier_2 ----> store_6 +> supplier_2 ---> depot_5 +> supplier_2 ---> depot_4 +> supplier_1 --> depot_3 +> supplier_1 ---> depot_4 +> depot_5 --> depot_4 +> depot_5 ---> store_6 +> depot_5 ---> store_7 +> depot_5 ---> store_8 +> depot_4 ---> store_6 +> depot_4 ---> store_7 +> depot_4 ---> store_8 +> depot_3 --> depot_5 +> depot_3 ---> store_6 +> depot_3 ---> store_7 +> depot_3 ---> store_8 +> ``` +> +> Although the supermarket knows how much stock is transported between locations, it doesn't know how much of each stock came from each supplier. +> +> This makes it difficult to report various metrics to the suppliers, like the stock balances and sales volumes of their products. + +> [!QUESTION] +> +> Determine what the most likely proportion of stock in a store at the end of each day is from each supplier. +> +> Assume that stock moves in a queue ([first in, first out](https://en.wikipedia.org/wiki/Stock_rotation)) in both the depots and the stores. +> +> The output should have a row per store per supplier per day, with the columns: +> +> - `stock_date` +> - `store_id` as the ID of the store +> - `supplier_id` as the ID of the supplier +> - `stock_volume` as the derived volume of stock in the store from the supplier at the end of the day +> - `stock_proportion` as the derived proportion of stock in the store from the supplier. Express this as a percentage rounded to two decimal places +> +> Order the output by `stock_date`, `store_id`, and `supplier_id`. +> +> You can choose to show stores that have no stock from a supplier on a given day (i.e., you can show a row with a `stock_volume` of 0 or not show the row at all, whatever is easiest for you). + +
+Expand for the DDL +--8<-- "docs/challenging-sql-problems/problems/gold/supply-chain-network.sql" +
+ +The solution can be found at: + +- [supply-chain-network.md](../../solutions/gold/supply-chain-network.md) + +A worked example is provided below to help illustrate the "shuffling" within the locations. + +--- + + +>? INFO: **Sample input** +> +> **Locations** +> +> | location_id | location_type | +> |------------:|:--------------| +> | 1 | supplier | +> | 2 | supplier | +> | 3 | depot | +> | 4 | depot | +> | 5 | store | +> +> **Deliveries** +> +> | delivery_date | from_location_id | to_location_id | product_id | quantity | +> |:--------------------|-----------------:|----------------:|-----------:|---------:| +> | 2024-01-01 01:23:53 | 1 | 3 | 123 | 25 | +> | 2024-01-01 06:27:54 | 2 | 4 | 123 | 25 | +> | 2024-01-01 12:27:39 | 4 | 5 | 123 | 25 | +> | 2024-01-01 17:12:59 | 1 | 3 | 123 | 30 | +> | 2024-01-02 01:27:57 | 3 | 5 | 123 | 25 | +> | 2024-01-02 05:16:08 | 3 | 4 | 123 | 30 | +> | 2024-01-02 05:40:53 | 2 | 3 | 123 | 20 | +> | 2024-01-02 07:29:53 | 1 | 4 | 123 | 30 | +> | 2024-01-02 09:22:53 | 3 | 5 | 123 | 20 | +> | 2024-01-02 18:28:39 | 4 | 5 | 123 | 60 | +> +> **Sales** +> +> | sale_datetime | store_id | product_id | quantity | +> |:--------------------|---------:|-----------:|---------:| +> | 2024-01-01 14:56:12 | 5 | 123 | 5 | +> | 2024-01-01 16:28:24 | 5 | 123 | 3 | +> | 2024-01-01 16:35:38 | 5 | 123 | 4 | +> | 2024-01-01 20:13:46 | 5 | 123 | 2 | +> | 2024-01-02 09:37:11 | 5 | 123 | 12 | +> | 2024-01-02 14:02:57 | 5 | 123 | 30 | +> | 2024-01-02 14:21:39 | 5 | 123 | 3 | +> | 2024-01-02 16:44:26 | 5 | 123 | 8 | +> | 2024-01-02 18:28:37 | 5 | 123 | 2 | +> +> **Network diagram** +> +> ```mermaid +> graph LR +> supplier_1 --> depot_3 +> supplier_1 --> depot_4 +> supplier_2 --> depot_3 +> supplier_2 --> depot_4 +> depot_3 --> depot_4 +> depot_3 --> store_5 +> depot_4 --> store_5 +> ``` +> +--8<-- "docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-input.sql" + + +>? INFO: **Sample output** +> +>| stock_date | store_id | supplier_id | stock_volume | stock_proportion | +>|:-----------|---------:|------------:|-------------:|-----------------:| +>| 2024-01-01 | 5 | 1 | 0 | 0.00 | +>| 2024-01-01 | 5 | 2 | 11 | 100.00 | +>| 2024-01-02 | 5 | 1 | 30 | 49.18 | +>| 2024-01-02 | 5 | 2 | 31 | 50.82 | +> +--8<-- "docs/challenging-sql-problems/problems/gold/supply-chain-network--sample-output.sql" + + +>? TIP: **Hint 1** +> +> (to be added) + + +>? TIP: **Hint 2** +> +> (to be added) + +--- + +### Worked example + +To help illustrate the stock movement within the locations, consider the locations and deliveries in the **Sample input**. + +We'll walk through each of the deliveries and how they contribute to end-of-day stock levels. + +Since each delivery and sale correspond to the same product, we'll omit mentioning the product ID in the following walkthrough. + +#### 2024-01-01 + +First, consider the deliveries: + +- **Supplier 1** sends 25 units to **Depot 3**; **Depot 3** has 25 units from **Supplier 1** and 0 units from **Supplier 2**. +- **Supplier 2** sends 25 units to **Depot 4**; **Depot 4** has 0 units from **Supplier 1** and 25 units from **Supplier 2**. +- **Depot 4** sends 25 units to **Store 5**; all 25 units are originally from **Supplier 2** so: + - **Store 5** has 0 units from **Supplier 1** and 25 units from **Supplier 2**. + - **Depot 4** has 0 units from either supplier. +- **Supplier 1** sends 30 units to **Depot 3**; **Depot 3** has 55 units from **Supplier 1** and 0 units from **Supplier 2**. + +Then the sales, which we can roll up to the end of the day: + +- **Store 5** sells 14 units throughout the day; all units are from **Supplier 2** so **Store 5** has 0 units from **Supplier 1** and 11 units from **Supplier 2**. + +Therefore, at the end of 2024-01-01, the proportion for **Store 5** is 100% from **Supplier 2**: + +| stock_date | store_id | supplier_id | stock_volume | stock_proportion | +| :--------- | -------: | ----------: | -----------: | ---------------: | +| 2024-01-01 | 5 | 1 | 0 | 0.00 | +| 2024-01-01 | 5 | 2 | 11 | 100.00 | + +#### 2024-01-02 + +First, consider the deliveries: + +- **Depot 3** sends 25 units to **Store 5**; all 25 units are from **Supplier 2** so: + - **Store 5** has 0 units from **Supplier 1** and 36 units from **Supplier 2**. + - **Depot 3** has 30 units from **Supplier 1** and 0 units from **Supplier 2**. +- **Depot 3** sends 30 units to **Depot 4**; all 30 units are from **Supplier 2** so: + - **Depot 4** has 0 units from **Supplier 1** and 30 units from **Supplier 2**. + - **Depot 3** has 0 units from either supplier. +- **Supplier 2** sends 20 units to **Depot 3**; **Depot 3** has 20 units from **Supplier 2** and 0 units from **Supplier 1**. +- **Supplier 1** sends 30 units to **Depot 4**; **Depot 4** has 30 units from **Supplier 1** and 30 units from **Supplier 2**. The 30 units from **Supplier 2** and first in the queue, followed by the 30 units from **Supplier 1**. +- **Depot 3** sends 20 units to **Store 5**; all 20 units are from **Supplier 2** so: + - **Store 5** has 0 units from **Supplier 1** and 56 units from **Supplier 2**. + - **Depot 3** has 0 units from either supplier. +- **Depot 4** sends 60 units to **Store 5**; 30 units are from **Supplier 1** and 30 units are from **Supplier 2** so: + - **Store 5** has 30 units from **Supplier 1** and 86 units from **Supplier 2**. The existing 56 units from **Supplier 2** are first in the queue, followed by the new 30 units from **Supplier 2**, followed by the 30 units from **Supplier 1**. + - **Depot 4** has 0 units from either supplier. + +Then the sales, which we can roll up to the end of the day: + +- **Store 5** sells 55 units throughout the day; all 86 units from **Supplier 2** are first in the queue, so **Store 5** has 30 units from **Supplier 1** and 31 units from **Supplier 2**. + +Therefore, at the end of 2024-01-02, the proportion for **Store 5** is 49.18% from **Supplier 1** and 50.82% from **Supplier 2**: + +| stock_date | store_id | supplier_id | stock_volume | stock_proportion | +| :--------- | -------: | ----------: | -----------: | ---------------: | +| 2024-01-02 | 5 | 1 | 30 | 49.18 | +| 2024-01-02 | 5 | 2 | 31 | 50.82 | + +Combined with the output from 2024-01-01, the output is the same as the **Sample output**. diff --git a/docs/challenging-sql-problems/problems/gold/supply-chain-network.sql b/docs/challenging-sql-problems/problems/gold/supply-chain-network.sql new file mode 100644 index 0000000..0d69331 --- /dev/null +++ b/docs/challenging-sql-problems/problems/gold/supply-chain-network.sql @@ -0,0 +1,89 @@ +```sql +create table locations ( + location_id integer primary key, + location_type varchar check (location_type in ('supplier', 'depot', 'store')) +); +create table deliveries ( + delivery_datetime timestamp, + from_location_id integer references locations(location_id), + to_location_id integer references locations(location_id), + product_id integer, + quantity integer not null, + primary key (delivery_datetime, from_location_id, to_location_id, product_id) +); +create table sales ( + sale_datetime timestamp, + store_id integer references locations(location_id), + product_id integer, + quantity integer not null, + primary key (sale_datetime, store_id, product_id) +); + +insert into locations +values + (1, 'supplier'), + (2, 'supplier'), + (3, 'depot'), + (4, 'depot'), + (5, 'depot'), + (6, 'store'), + (7, 'store'), + (8, 'store') +; +insert into deliveries +values + ('2024-01-01 01:10:50', 1, 3, 1001, 25), + ('2024-01-01 01:23:53', 1, 4, 1001, 25), + ('2024-01-01 04:54:05', 2, 4, 1001, 20), + ('2024-01-01 16:23:50', 2, 5, 1001, 20), + ('2024-01-01 20:49:37', 2, 6, 1001, 10), + ('2024-01-02 04:46:17', 3, 7, 1001, 10), + ('2024-01-02 05:10:39', 3, 8, 1001, 10), + ('2024-01-02 09:44:57', 4, 6, 1001, 35), + ('2024-01-02 11:08:09', 5, 6, 1001, 10), + ('2024-01-02 11:47:35', 5, 7, 1001, 5), + ('2024-01-02 13:06:56', 5, 8, 1001, 5), + ('2024-01-02 14:18:25', 3, 5, 1001, 5), + ('2024-01-02 15:58:54', 1, 3, 1001, 30), + ('2024-01-02 18:22:16', 2, 4, 1001, 25), + ('2024-01-02 23:16:51', 2, 5, 1001, 25), + ('2024-01-03 12:43:57', 3, 6, 1001, 25), + ('2024-01-03 14:55:35', 4, 7, 1001, 20), + ('2024-01-03 15:49:15', 4, 8, 1001, 15), + ('2024-01-03 18:07:21', 5, 8, 1001, 20), + ('2024-01-03 18:12:31', 5, 4, 1001, 5), + ('2024-01-03 19:44:16', 1, 3, 1001, 20), + ('2024-01-03 19:37:32', 1, 4, 1001, 30), + ('2024-01-03 22:33:48', 2, 6, 1001, 20), + ('2024-01-04 02:46:31', 3, 6, 1001, 15), + ('2024-01-04 05:58:24', 3, 8, 1001, 10), + ('2024-01-04 06:04:52', 4, 7, 1001, 25), + ('2024-01-04 13:32:47', 4, 8, 1001, 5), + ('2024-01-04 19:32:47', 4, 6, 1001, 5), + ('2024-01-04 20:38:40', 5, 6, 1001, 5) +; +insert into sales +values + ('2024-01-02 07:12:21'::timestamp, 6, 1001, 2), + ('2024-01-02 09:51:01'::timestamp, 7, 1001, 4), + ('2024-01-02 10:55:42'::timestamp, 8, 1001, 9), + ('2024-01-02 11:21:10'::timestamp, 7, 1001, 19), + ('2024-01-02 15:02:20'::timestamp, 7, 1001, 1), + ('2024-01-02 16:18:00'::timestamp, 7, 1001, 1), + ('2024-01-02 18:47:13'::timestamp, 7, 1001, 9), + ('2024-01-02 19:15:12'::timestamp, 8, 1001, 5), + ('2024-01-02 20:38:01'::timestamp, 6, 1001, 14), + ('2024-01-03 07:00:27'::timestamp, 7, 1001, 13), + ('2024-01-03 08:56:40'::timestamp, 6, 1001, 1), + ('2024-01-03 09:40:07'::timestamp, 6, 1001, 14), + ('2024-01-03 10:21:06'::timestamp, 7, 1001, 4), + ('2024-01-03 12:31:10'::timestamp, 6, 1001, 10), + ('2024-01-03 15:56:56'::timestamp, 8, 1001, 5), + ('2024-01-03 17:49:04'::timestamp, 7, 1001, 12), + ('2024-01-03 18:02:34'::timestamp, 6, 1001, 1), + ('2024-01-03 20:19:42'::timestamp, 7, 1001, 7), + ('2024-01-03 20:28:00'::timestamp, 6, 1001, 8), + ('2024-01-04 13:07:02'::timestamp, 7, 1001, 24), + ('2024-01-04 14:03:39'::timestamp, 8, 1001, 16) +; +``` diff --git a/docs/challenging-sql-problems/solutions/gold/supply-chain-network.md b/docs/challenging-sql-problems/solutions/gold/supply-chain-network.md new file mode 100644 index 0000000..20eb924 --- /dev/null +++ b/docs/challenging-sql-problems/solutions/gold/supply-chain-network.md @@ -0,0 +1,27 @@ +# Supply chain network 🚛 + +> [!TIP] +> +> Solution to the following problem: +> +> - [supply-chain-network.md](../../problems/gold/supply-chain-network.md) + +## Result Set + +Regardless of the database, the result set should look like: + +(to be added) + +
+Expand for the DDL +--8<-- "docs/challenging-sql-problems/solutions/gold/supply-chain-network.sql" +
+ +## Solution + +Some SQL solutions per database are provided below. + + +> SUCCESS: **DuckDB** +> +--8<-- "docs/challenging-sql-problems/solutions/gold/supply-chain-network--duckdb.sql" diff --git a/mkdocs.yml b/mkdocs.yml index 77f0707..3eb6fc3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,8 +95,9 @@ nav: - challenging-sql-problems/problems/silver/mandelbrot-set.md # recursive CTE - challenging-sql-problems/problems/silver/customer-sales-running-totals.md # window functions - 🟡 Gold Tier: - - challenging-sql-problems/problems/gold/encoding-datelist-ints.md # window functions (gaps and islands), correlated subquery, custom axis - challenging-sql-problems/problems/gold/loan-repayment-schedule.md # recursive CTE + - challenging-sql-problems/problems/gold/supply-chain-network.md + - challenging-sql-problems/problems/gold/encoding-datelist-ints.md # window functions (gaps and islands), correlated subquery, custom axis - challenging-sql-problems/problems/gold/travel-plans.md # recursive CTEs - Solutions: - 🟤 Bronze Tier: @@ -119,8 +120,9 @@ nav: - challenging-sql-problems/solutions/silver/mandelbrot-set.md - challenging-sql-problems/solutions/silver/customer-sales-running-totals.md - 🟡 Gold Tier: - - challenging-sql-problems/solutions/gold/encoding-datelist-ints.md - challenging-sql-problems/solutions/gold/loan-repayment-schedule.md + - challenging-sql-problems/solutions/gold/supply-chain-network.md + - challenging-sql-problems/solutions/gold/encoding-datelist-ints.md - challenging-sql-problems/solutions/gold/travel-plans.md markdown_extensions: