diff --git a/models/marts/education/education marts_tests.yml b/models/marts/education/education marts_tests.yml new file mode 100644 index 0000000..1a6dd0c --- /dev/null +++ b/models/marts/education/education marts_tests.yml @@ -0,0 +1,267 @@ +version: 2 + +models: + - name: parent_satisfaction + description: "Aggregates parent satisfaction data for analysis." + columns: + - name: term + tests: + - not_null + - name: grade + tests: + - not_null + - name: school_type + tests: + - accepted_values: + values: ["ksg", "msg"] + - name: education_satisfaction + tests: + - accepted_values: + values: ["very_satisfied_5", "satisfied_4", "neutral_3", "dissatisfied_2", "very_dissatisfied_1"] + - name: year + tests: + - not_null + + - name: student_satisfaction + description: "Analyzes student satisfaction with schools." + columns: + - name: term + tests: + - not_null + - name: grade + tests: + - not_null + - name: school_type + tests: + - not_null + - accepted_values: + values: ["ksg", "msg"] + - name: school_satisfaction + tests: + - not_null + - accepted_values: + values: ["very_satisfied", "neutral", "very_dissatisfied", "satisfied", "dissatisfied"] + - name: year + tests: + - not_null + + - name: teacher_satisfaction + description: "Summarizes teacher satisfaction survey data." + columns: + - name: teaching_level + tests: + - not_null + - name: education_satisfaction + tests: + - not_null + - accepted_values: + values: ["very_satisfied", "neutral", "very_dissatisfied", "satisfied", "dissatisfied"] + - name: school_type + tests: + - not_null + - accepted_values: + values: ["ksg", "msg"] + - name: year + tests: + - not_null + + - name: well_being_sessions + description: "Aggregates data on well-being sessions." + columns: + - name: "date" + tests: + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: date + - name: grade + tests: + - not_null + - name: school_type + tests: + - accepted_values: + values: ["ksg", "msg"] + - name: "number_of_students_trained" + tests: + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: "session_type" + tests: + - not_null + - accepted_values: + values: ["Group", "Individual"] + + - name: followup_attendance + description: "Tracks follow-ups for student attendance to address absenteeism." + columns: + - name: absence_date + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: date + - name: grade + tests: + - not_null + - name: year + tests: + - not_null + - name: term + tests: + - not_null + - name: reporting_date + tests: + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: date + - name: school_type + tests: + - not_null + - accepted_values: + values: ["ksg", "msg"] + - name: number_of_absences + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: bigint + + - name: parents_attendance + description: "Captures parent attendance data for school meetings and engagement levels." + columns: + - name: year + tests: + - not_null + - name: grade + tests: + - not_null + - name: date + tests: + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: date + - name: "meeting_type" + tests: + - not_null + - name: "number_present" + tests: + - not_null + - name: "attendance_percentage" + tests: + - not_null + - name: school_type + tests: + - not_null + - accepted_values: + values: ["ksg", "msg"] + + - name: students_attendance + description: "Tracks student attendance statistics." + columns: + - name: term + tests: + - not_null + - name: year + tests: + - not_null + - name: grade + tests: + - not_null + - name: "avg_days_attended" + tests: + - not_null + - name: "total_school_days" + tests: + - not_null + - name: "number_of_students" + tests: + - not_null + - name: "attendance_percentage" + tests: + - not_null + - name: "total_days_absent" + tests: + - not_null + - name: "total_days_present" + tests: + - not_null + - name: school_type + tests: + - not_null + - accepted_values: + values: ["ksg", "msg"] + + - name: education_scholarship_categories_agg + description: "Aggregates scholarship data to create categorized insights based on student demographics." + columns: + - name: year + tests: + - not_null + - accepted_values: + values: ["2021", "2022", "2023", "2024", "2025"] + - name: cohort + tests: + - not_null + - name: categories + tests: + - not_null + - name: county + tests: + - not_null + - name: subcounty + tests: + - not_null + - name: term + tests: + - not_null + - name: boarding_or_day + tests: + - accepted_values: + values: ["Boarding", "Day"] + + - name: nudges + description: "Analyzes distribution of education nudges based on demographics." + columns: + - name: term + tests: + - not_null + - name: year + tests: + - not_null + - name: county + tests: + - not_null + - name: grade + tests: + - not_null + - name: gender + tests: + - not_null + - accepted_values: + values: ["male", "female"] + - name: nudge_type + tests: + - not_null + - name: nudge_count + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: bigint + + - name: public_partnerships + description: "Summarizes public school partnership data, including KPCE scores and resource allocations." + columns: + - name: year + tests: + - not_null + - name: county + tests: + - not_null + - name: subcounty + tests: + - not_null + - name: mean_kpce_score + tests: + - not_null + - name: students_enrolled + tests: + - not_null + - name: high_touch_low_touch + tests: + - not_null + - accepted_values: + values: ["High", "Low"] \ No newline at end of file diff --git a/models/marts/education/followup_attendance.sql b/models/marts/education/followup_attendance.sql index 6a35904..7992ab8 100644 --- a/models/marts/education/followup_attendance.sql +++ b/models/marts/education/followup_attendance.sql @@ -11,24 +11,23 @@ WITH clean_data AS ( "absence_causes", "_airbyte_extracted_at", "school_type", - -- Directly convert "Date_" to a proper date - TO_DATE("date", 'DD/MM/YYYY') AS "date", + TO_DATE("date", 'DD/MM/YYYY') AS "absence_date", TO_DATE("estimated_reporting_date", 'DD/MM/YYYY') AS "reporting_date" FROM {{ ref('staging_followup_attendance') }} ) SELECT - "date" AS "absence_date", - EXTRACT(YEAR FROM "date") AS "year", + "absence_date", + EXTRACT(YEAR FROM "absence_date") AS "year", -- Calculate term based on the valid date CASE - WHEN "date" BETWEEN DATE_TRUNC('year', "date") - AND DATE_TRUNC('year', "date") + INTERVAL '3 months - 1 day' THEN 'Term 1' - WHEN "date" BETWEEN DATE_TRUNC('year', "date") + INTERVAL '3 months' - AND DATE_TRUNC('year', "date") + INTERVAL '7 months - 1 day' THEN 'Term 2' - WHEN "date" BETWEEN DATE_TRUNC('year', "date") + INTERVAL '7 months' - AND DATE_TRUNC('year', "date") + INTERVAL '12 months - 1 day' THEN 'Term 3' + WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + AND DATE_TRUNC('year', "absence_date") + INTERVAL '3 months - 1 day' THEN 'Term 1' + WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '3 months' + AND DATE_TRUNC('year', "absence_date") + INTERVAL '7 months - 1 day' THEN 'Term 2' + WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '7 months' + AND DATE_TRUNC('year', "absence_date") + INTERVAL '12 months - 1 day' THEN 'Term 3' ELSE NULL END AS "term", @@ -43,18 +42,17 @@ SELECT "absence_causes", "reporting_date", LOWER("school_type") AS "school_type", - COUNT(*) AS "number_of_absences" FROM clean_data GROUP BY - "date", + "absence_date", CASE - WHEN "date" BETWEEN DATE_TRUNC('year', "date") - AND DATE_TRUNC('year', "date") + INTERVAL '3 months - 1 day' THEN 'Term 1' - WHEN "date" BETWEEN DATE_TRUNC('year', "date") + INTERVAL '3 months' - AND DATE_TRUNC('year', "date") + INTERVAL '7 months - 1 day' THEN 'Term 2' - WHEN "date" BETWEEN DATE_TRUNC('year', "date") + INTERVAL '7 months' - AND DATE_TRUNC('year', "date") + INTERVAL '12 months - 1 day' THEN 'Term 3' + WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + AND DATE_TRUNC('year', "absence_date") + INTERVAL '3 months - 1 day' THEN 'Term 1' + WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '3 months' + AND DATE_TRUNC('year', "absence_date") + INTERVAL '7 months - 1 day' THEN 'Term 2' + WHEN "absence_date" BETWEEN DATE_TRUNC('year', "absence_date") + INTERVAL '7 months' + AND DATE_TRUNC('year', "absence_date") + INTERVAL '12 months - 1 day' THEN 'Term 3' ELSE NULL END, CASE diff --git a/models/marts/education/parent_satisfaction.sql b/models/marts/education/parent_satisfaction.sql index 8bf2b1c..5608d28 100644 --- a/models/marts/education/parent_satisfaction.sql +++ b/models/marts/education/parent_satisfaction.sql @@ -13,7 +13,7 @@ SELECT ELSE CONCAT('Grade ', REGEXP_REPLACE(LOWER("grade"), '.*grade', '')) END AS "grade", - "school" AS "school_type", + LOWER("school") AS "school_type", "education_satisfaction", "class_year_of_child" AS "year" FROM {{ ref("staging_parents_satisfaction_survey") }} \ No newline at end of file diff --git a/models/marts/education/parents_attendance.sql b/models/marts/education/parents_attendance.sql index 6ef3bb0..c83c854 100644 --- a/models/marts/education/parents_attendance.sql +++ b/models/marts/education/parents_attendance.sql @@ -7,11 +7,11 @@ SELECT "Term" as "term", "Year" as "year", "Grade" as "grade", - "Meeting_Date" as "date", - "Meeting_Type", - "Number_Present", - "Number_of_Parents", - "Share_of_Parents_Engaged", - "Attendance_Percentage_", - "school_type" + TO_DATE("Meeting_Date", 'DD/MM/YYYY') AS "date", + "Meeting_Type" as "meeting_type", + "Number_Present" as "number_present", + "Number_of_Parents" as "number_of_parents", + "Share_of_Parents_Engaged" as "share_of_parents_engaged", + "Attendance_Percentage_" as "attendance_percentage", + LOWER("school_type") as "school_type" FROM {{ ref("staging_parents_attendance") }} \ No newline at end of file diff --git a/models/marts/education/public_partnerships.sql b/models/marts/education/public_partnerships.sql index fe76dc4..22b8940 100644 --- a/models/marts/education/public_partnerships.sql +++ b/models/marts/education/public_partnerships.sql @@ -5,7 +5,7 @@ SELECT "year", - "school_name", + LOWER("school_name"), "county", "mean_kpce_score", "subcounty", diff --git a/models/marts/education/student_satisfaction.sql b/models/marts/education/student_satisfaction.sql index 5e05b76..a576466 100644 --- a/models/marts/education/student_satisfaction.sql +++ b/models/marts/education/student_satisfaction.sql @@ -13,7 +13,7 @@ SELECT ELSE CONCAT('Grade ', REGEXP_REPLACE(LOWER("grade"), '.*grade', '')) END AS "grade", - "school" AS "school_type", + LOWER("school") AS "school_type", "school_satisfaction", "class_year" AS "year" FROM {{ ref("staging_student_satisfaction_survey") }} \ No newline at end of file diff --git a/models/marts/education/students_attendance.sql b/models/marts/education/students_attendance.sql index b463df2..fd44cd1 100644 --- a/models/marts/education/students_attendance.sql +++ b/models/marts/education/students_attendance.sql @@ -7,11 +7,11 @@ SELECT "Term" as "term", "Grade" as "grade", "Year" as "year", - "Avg_Days_Attended", - "Total_School_Days", - "Number_of_Students", - "Attendance_Percentage", - "All_Students___Total_Days_Absent", - "All_Students___Total_Days_Present", + "Avg_Days_Attended" as "avg_days_attended", + "Total_School_Days" as "total_school_days", + "Number_of_Students" as "number_of_students", + "Attendance_Percentage" as "attendance_percentage", + "All_Students___Total_Days_Absent" as "total_days_absent", + "All_Students___Total_Days_Present" as "total_days_present", LOWER("School_Type") as "school_type" FROM {{ ref("staging_students_attendance") }} \ No newline at end of file diff --git a/models/marts/education/teacher_satisfaction.sql b/models/marts/education/teacher_satisfaction.sql index bd41da4..d68dc82 100644 --- a/models/marts/education/teacher_satisfaction.sql +++ b/models/marts/education/teacher_satisfaction.sql @@ -4,6 +4,8 @@ ) }} SELECT - "teaching_level", "school_name", "education_satisfaction", - "school_name" AS "school_type", "year" + "teaching_level", + "education_satisfaction", + LOWER("school_name") AS "school_type", + "year" FROM {{ ref("staging_teacher_satisfaction_survey") }} \ No newline at end of file diff --git a/models/marts/education/well_being_sessions.sql b/models/marts/education/well_being_sessions.sql index 63d74fa..0c17f22 100644 --- a/models/marts/education/well_being_sessions.sql +++ b/models/marts/education/well_being_sessions.sql @@ -4,11 +4,11 @@ ) }} SELECT - TO_DATE("Date", 'DD/MM/YYYY') AS "Date", + TO_DATE("Date", 'DD/MM/YYYY') AS "date", "Grade" as "grade", - "Topic", + "Topic" as "topic", LOWER("School") as "school_type", - "Stream", - CAST("Number_of_stdents_trained" AS INTEGER) AS "Number_of_stdents_trained", - "Session_Type" + "Stream" as "stream", + CAST("Number_of_stdents_trained" AS INTEGER) AS "number_of_students_trained", + "Session_Type" as "session_type" FROM {{ ref("staging_well_being_sessions") }} \ No newline at end of file diff --git a/models/marts/org_wide/county_footprint.sql b/models/marts/org_wide/county_footprint.sql index b8ff88d..700e851 100644 --- a/models/marts/org_wide/county_footprint.sql +++ b/models/marts/org_wide/county_footprint.sql @@ -9,13 +9,13 @@ SELECT "latitude", "longitude", -- Transform each program column to numeric - CASE WHEN "SUN" = 'Y' THEN 1 ELSE 0 END AS SUN, - CASE WHEN "Youth_Voice" = 'Y' THEN 1 ELSE 0 END AS Youth_Voice, - CASE WHEN "SACCO" = 'Y' THEN 1 ELSE 0 END AS SACCO, - CASE WHEN "SL" = 'Y' THEN 1 ELSE 0 END AS SL, - CASE WHEN "Education" = 'Y' THEN 1 ELSE 0 END AS Education, - CASE WHEN "Gender" = 'Y' THEN 1 ELSE 0 END AS Gender, - CASE WHEN "WASH" = 'Y' THEN 1 ELSE 0 END AS WASH, - CASE WHEN "Health" = 'Y' THEN 1 ELSE 0 END AS Health, - CASE WHEN "Libraries" = 'Y' THEN 1 ELSE 0 END AS Libraries + CASE WHEN "SUN" = 'Y' THEN 1 ELSE 0 END AS sun, + CASE WHEN "Youth_Voice" = 'Y' THEN 1 ELSE 0 END AS youth_voice, + CASE WHEN "SACCO" = 'Y' THEN 1 ELSE 0 END AS sacco, + CASE WHEN "SL" = 'Y' THEN 1 ELSE 0 END AS sl, + CASE WHEN "Education" = 'Y' THEN 1 ELSE 0 END AS education, + CASE WHEN "Gender" = 'Y' THEN 1 ELSE 0 END AS gender, + CASE WHEN "WASH" = 'Y' THEN 1 ELSE 0 END AS wash, + CASE WHEN "Health" = 'Y' THEN 1 ELSE 0 END AS health, + CASE WHEN "Libraries" = 'Y' THEN 1 ELSE 0 END AS libraries FROM {{ ref("staging_county_footprint") }} \ No newline at end of file diff --git a/models/marts/org_wide/mapping.sql b/models/marts/org_wide/mapping.sql index 7bd8973..a0a36f3 100644 --- a/models/marts/org_wide/mapping.sql +++ b/models/marts/org_wide/mapping.sql @@ -14,7 +14,7 @@ SELECT "county", "constituency", "office_gender_desk_location", - "received_on", + CAST("received_on" as DATE) as "received_on", -- Extract GPS components from gps_location field split_part("gps_location", ' ', 1)::float AS "latitude", diff --git a/models/marts/org_wide/org_wide_marts_tests.yml b/models/marts/org_wide/org_wide_marts_tests.yml new file mode 100644 index 0000000..e50cf34 --- /dev/null +++ b/models/marts/org_wide/org_wide_marts_tests.yml @@ -0,0 +1,117 @@ +version: 2 + +models: + - name: county_footprint + description: "Tracks the program's impact across counties with consolidated data." + columns: + - name: county + tests: + - not_null + - name: iso_3166_2_code + tests: + - not_null + - unique + - name: latitude + tests: + - not_null + - name: longitude + tests: + - not_null + - name: sl + tests: + - not_null + - accepted_values: + values: [1, 0] + - name: sun + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: wash + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: sacco + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: gender + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: health + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: education + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: libraries + tests: + - not_null + - accepted_values: + values: [1, 0] + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: integer + - name: youth_voice + tests: + - not_null + - accepted_values: + values: [1, 0] + + - name: mapping + description: "Standardizes and prepares mapping data for program-wide reporting and analysis." + columns: + - name: id + tests: + - not_null + - name: case_id + tests: + - not_null + - name: case_type + tests: + - not_null + - name: county + tests: + - not_null + - name: ward + tests: + - not_null + - name: constituency + tests: + - not_null + - name: latitude + tests: + - not_null + - name: longitude + tests: + - not_null + - name: altitude + tests: + - not_null + - name: accuracy + tests: + - not_null + - name: received_on + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_of_type: + column_type: date \ No newline at end of file