Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new workflow #69

Merged
merged 15 commits into from
Jan 17, 2025
45 changes: 0 additions & 45 deletions .github/.dbt-ci.yml

This file was deleted.

108 changes: 108 additions & 0 deletions .github/workflows/.dbt-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
name: Our first dbt PR job

on:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
push:
branches:
- '!main'

jobs:
dbt_ci:
runs-on: ubuntu-latest

steps:
# Step 1: Checkout the code
- name: Checkout Code
uses: actions/checkout@v2

# Step 2: Set up Python
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.8'

# Step 3: Add SSH private key and configure known_hosts
- name: Add SSH private key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/production_ddp
chmod 600 ~/.ssh/production_ddp

- name: Add remote host to known_hosts
run: ssh-keyscan -H ${{ secrets.SSH_HOST }} >> ~/.ssh/known_hosts

# Step 4: Debug SSH connection
- name: Debug SSH connection
run: |
ssh -i ~/.ssh/production_ddp -o StrictHostKeyChecking=no ${{ secrets.DB_USER }}@${{ secrets.SSH_HOST }} echo "SSH connection successful"

# Step 5: Establish SSH tunnel
- name: Establish SSH tunnel
run: |
ssh -fN \
-L 5432:${{ secrets.POSTGRES_HOST }}:5432 \
-i ~/.ssh/production_ddp \
${{ secrets.DB_USER }}@${{ secrets.SSH_HOST }}

# Step 6: Install Python dependencies
- name: Install requirements
run: pip install -r requirements.txt

# Step 7: Create profiles.yml
- name: Create profiles.yml
run: |
mkdir -p /home/runner/.dbt
echo "Creating profiles.yml..."
cat <<EOF > /home/runner/.dbt/profiles.yml
default:
outputs:
dev:
dbname: "{{ env_var('POSTGRES_DBNAME') }}"
host: "{{ env_var('POSTGRES_HOST') }}"
password: "{{ env_var('POSTGRES_PASSWORD') }}"
port: 5432
schema: dev
threads: 8
type: postgres
user: "{{ env_var('POSTGRES_USER') }}"
target: dev
EOF
echo "profiles.yml created successfully."
ls -al /home/runner/.dbt

# Step 8: Install dbt dependencies
- name: Install dbt dependencies
run: dbt deps

# Step 9: Run dbt build
- name: dbt build
run: dbt build --full-refresh
env:
POSTGRES_DBNAME: ${{ secrets.POSTGRES_DBNAME }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_HOST: 127.0.0.1 # Use localhost since the SSH tunnel forwards the port
DBT_PROFILES_DIR: /home/runner/.dbt

# Step 10: Benchmark models using sqlfluff
- name: Benchmarking models
run: sqlfluff lint models --bench
env:
POSTGRES_DBNAME: ${{ secrets.POSTGRES_DBNAME }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_HOST: 127.0.0.1

# Step 11: Lint models using sqlfluff
- name: Lint models
run: sqlfluff lint models -f human
env:
POSTGRES_DBNAME: ${{ secrets.POSTGRES_DBNAME }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_HOST: 127.0.0.1
39 changes: 0 additions & 39 deletions .github/workflows/python-app.yml

This file was deleted.

1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ logs/
# legacy -- renamed to dbt_packages in v1
dbt_modules/
venv/
profiles.yml
.user.yml
package-lock.yml
models/wash_unit_tests.yml
Expand Down
2 changes: 1 addition & 1 deletion .sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dialect = postgres
templater = dbt

# Comma separated list of rules to exclude, or None
exclude_rules = L001,L029,RF05, LT05, ST06, RF02
exclude_rules = L001,L029,RF05, LT05, ST06, RF02, ST10
#max_line_length = 120
27 changes: 14 additions & 13 deletions models/marts/education/education_scholarship_categories_agg.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@
) }}

SELECT
"year",
"cohort",
"county",
"subcounty",
"form",
"term",
"boarding_or_day",
year,
cohort,
county,
subcounty,
form,
term,
boarding_or_day,
COALESCE(
NULLIF(
CONCAT_WS(', ',
CASE WHEN "teen_mom" = 'Yes' THEN 'Teen Mom' END,
CASE WHEN "orphan" = 'Yes' THEN 'Orphan' END,
CASE WHEN "special_needs" = 'Yes' THEN 'Special Needs' END,
CASE WHEN "gbv_survivor" = 'Yes' THEN 'GBV Survivor' END
CONCAT_WS(
', ',
CASE WHEN teen_mom = 'Yes' THEN 'Teen Mom' END,
CASE WHEN orphan = 'Yes' THEN 'Orphan' END,
CASE WHEN special_needs = 'Yes' THEN 'Special Needs' END,
CASE WHEN gbv_survivor = 'Yes' THEN 'GBV Survivor' END
),
''
),
'None'
) AS categories
FROM {{ ref('staging_scholarships') }}
FROM {{ ref('staging_scholarships') }}
86 changes: 46 additions & 40 deletions models/marts/education/followup_attendance.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,63 +3,69 @@
tags="education_attendance"
) }}


WITH clean_data AS (
SELECT
"grade",
"stream",
"name_of_student",
"absence_causes",
"_airbyte_extracted_at",
"school_type",
TO_DATE("date", 'DD/MM/YYYY') AS "absence_date",
TO_DATE("estimated_reporting_date", 'DD/MM/YYYY') AS "reporting_date"
grade,
stream,
name_of_student,
absence_causes,
_airbyte_extracted_at,
school_type,
-- Directly convert "Date_" to a proper date
TO_DATE(date, 'DD/MM/YYYY') AS absence_date,
TO_DATE(estimated_reporting_date, 'DD/MM/YYYY') AS reporting_date
FROM {{ ref('staging_followup_attendance') }}
)

SELECT
"absence_date",
EXTRACT(YEAR FROM "absence_date") AS "year",
absence_date,
EXTRACT(YEAR FROM absence_date) AS year,

-- Calculate term based on the valid date
CASE
WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date")
AND DATE_TRUNC('year', "absence_date") + INTERVAL '3 months - 1 day' THEN 'Term 1'
WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '3 months'
AND DATE_TRUNC('year', "absence_date") + INTERVAL '7 months - 1 day' THEN 'Term 2'
WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '7 months'
AND DATE_TRUNC('year', "absence_date") + INTERVAL '12 months - 1 day' THEN 'Term 3'
ELSE NULL
END AS "term",
WHEN
absence_date BETWEEN DATE_TRUNC('year', absence_date)
AND DATE_TRUNC('year', absence_date) + INTERVAL '3 months - 1 day' THEN 'Term 1'
WHEN
absence_date BETWEEN DATE_TRUNC('year', absence_date) + INTERVAL '3 months'
AND DATE_TRUNC('year', absence_date) + INTERVAL '7 months - 1 day' THEN 'Term 2'
WHEN
absence_date BETWEEN DATE_TRUNC('year', absence_date) + INTERVAL '7 months'
AND DATE_TRUNC('year', absence_date) + INTERVAL '12 months - 1 day' THEN 'Term 3'
END AS term,

-- Transform Grade values
CASE
WHEN "grade" = 'K' THEN 'Kindergarten'
ELSE CONCAT('Grade ', "grade")
END AS "grade",
WHEN grade = 'K' THEN 'Kindergarten'
ELSE CONCAT('Grade ', grade)
END AS grade,

-- Retain other columns and transformations
"stream",
"absence_causes",
"reporting_date",
LOWER("school_type") AS "school_type",
COUNT(*) AS "number_of_absences"
stream,
absence_causes,
reporting_date,
LOWER(school_type) AS school_type,
COUNT(*) AS number_of_absences
FROM clean_data
GROUP BY
"absence_date",
absence_date,
CASE
WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date")
AND DATE_TRUNC('year', "absence_date") + INTERVAL '3 months - 1 day' THEN 'Term 1'
WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '3 months'
AND DATE_TRUNC('year', "absence_date") + INTERVAL '7 months - 1 day' THEN 'Term 2'
WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '7 months'
AND DATE_TRUNC('year', "absence_date") + INTERVAL '12 months - 1 day' THEN 'Term 3'
ELSE NULL
WHEN
absence_date BETWEEN DATE_TRUNC('year', absence_date)
AND DATE_TRUNC('year', absence_date) + INTERVAL '3 months - 1 day' THEN 'Term 1'
WHEN
absence_date BETWEEN DATE_TRUNC('year', absence_date) + INTERVAL '3 months'
AND DATE_TRUNC('year', absence_date) + INTERVAL '7 months - 1 day' THEN 'Term 2'
WHEN
absence_date BETWEEN DATE_TRUNC('year', absence_date) + INTERVAL '7 months'
AND DATE_TRUNC('year', absence_date) + INTERVAL '12 months - 1 day' THEN 'Term 3'
END,
CASE
WHEN "grade" = 'K' THEN 'Kindergarten'
ELSE CONCAT('Grade ', "grade")
WHEN grade = 'K' THEN 'Kindergarten'
ELSE CONCAT('Grade ', grade)
END,
"stream",
"absence_causes",
"reporting_date",
LOWER("school_type")
stream,
absence_causes,
reporting_date,
LOWER(school_type)
Loading
Loading