Skip to content

Commit

Permalink
Improve performance of analysis 117
Browse files Browse the repository at this point in the history
Since HAVING clause removes records which don't match obs_month, we can use INNER JOIN here.

This is to address problem in Databricks where small table in LEFT JOIN is not broadcasted when it's on left side.

Related issue #690
  • Loading branch information
Gennadiy Anisimov committed Mar 26, 2024
1 parent 5fdd05c commit cd0c03a
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions inst/sql/sql_server/analyses/117.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@

--HINT DISTRIBUTE_ON_KEY(stratum_1)
-- generating date key sequences in a cross-dialect compatible fashion
with century as (select '19' num union select '20' num),
with century as (select '19' num union select '20' num),
tens as (select '0' num union select '1' num union select '2' num union select '3' num union select '4' num union select '5' num union select '6' num union select '7' num union select '8' num union select '9' num),
ones as (select '0' num union select '1' num union select '2' num union select '3' num union select '4' num union select '5' num union select '6' num union select '7' num union select '8' num union select '9' num),
months as (select '01' as num union select '02' num union select '03' num union select '04' num union select '05' num union select '06' num union select '07' num union select '08' num union select '09' num union select '10' num union select '11' num union select '12' num),
date_keys as (select cast(concat(century.num, tens.num, ones.num,months.num) as int) obs_month from century cross join tens cross join ones cross join months)
SELECT
117 as analysis_id,
117 as analysis_id,
CAST(t1.obs_month AS VARCHAR(255)) as stratum_1,
cast(null as varchar(255)) as stratum_2, cast(null as varchar(255)) as stratum_3, cast(null as varchar(255)) as stratum_4, cast(null as varchar(255)) as stratum_5,
COALESCE(COUNT_BIG(distinct op1.PERSON_ID),0) as count_value
into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_117
FROM date_keys t1
left join
(select t2.obs_month, op2.*
from @cdmDatabaseSchema.observation_period op2, date_keys t2
where year(op2.observation_period_start_date)*100 + month(op2.observation_period_start_date) <= t2.obs_month
and year(op2.observation_period_end_date)*100 + month(op2.observation_period_end_date) >= t2.obs_month
) op1 on op1.obs_month = t1.obs_month
into @scratchDatabaseSchema@schemaDelim@tempAchillesPrefix_117
from
@cdmDatabaseSchema.observation_period op1
inner join
date_keys t1 on
year(op1.observation_period_start_date)*100 + month(op1.observation_period_start_date) <= t1.obs_month
and
year(op1.observation_period_end_date)*100 + month(op1.observation_period_end_date) >= t1.obs_month
group by t1.obs_month
having COALESCE(COUNT_BIG(distinct op1.PERSON_ID),0) > 0;

0 comments on commit cd0c03a

Please sign in to comment.