Skip to content

Commit

Permalink
Merge branch 'main' into fix/dim_teachers
Browse files Browse the repository at this point in the history
  • Loading branch information
jordan-springer authored Jan 29, 2024
2 parents 753d308 + 0e70085 commit 8c981ff
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 84 deletions.
37 changes: 37 additions & 0 deletions dbt/macros/active_status_label.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{# Notes:

This macro is an attempt to engage in some DRY practice - to remove repeticious hard-coded labels for
school, teacher, etc. activity.

We have a method for determining whether a school, teacher, section is "active" (see: dim_school_status) The method produces a
3-digit binary code. The key is below. This method is used in several locations where a label e.g. "active new"
is applided.

These 3 values can be combined into an ordered 3-char string representing the concatenated true/false combinations
for Active|Prev|Ever e.g. "101" means: ( Active = true AND Prev year = false AND Ever before = true )

- '000' (0) = 'market' -- Not active now + never been active
- '001' (1) = 'inactive churn' -- NOT active + NOT active prev year + active ever before
- '010' (2) = '<impossible status>' -- should not be possible, active in the prev year should imply active ever before
- '011' (3) = 'inactive this year' -- NOT active + active prev year + (active ever before implied)
- '100' (4) = 'active new' -- active this year + NOT active last year + NOT active ever before
- '101' (5) = 'active reacquired' -- Active this year + NOT active last year + active in the past
- '110' (6) = '<impossible status>' -- impossible for same reason as status (2)
- '111' (7) = 'active retained' -- active this year + active last year + (active ever before implied)
#}

{% macro active_status_label(status_code) %}

case
when {{status_code}} = '000' then 'market'
when {{status_code}} = '001' then 'inactive churn'
when {{status_code}} = '010' then '<impossible status>'
when {{status_code}} = '011' then 'inactive this year'
when {{status_code}} = '100' then 'active new'
when {{status_code}} = '101' then 'active reacquired'
when {{status_code}} = '110' then '<impossible status>'
when {{status_code}} = '111' then 'active retained'
when {{status_code}} IS NULL then NULL --on the fence about whether this should pass-thru the null, or whether we should be noisey about it
else then 'INVALID CODE: ' || {{status_code}}
end
{% endmacro %}
21 changes: 21 additions & 0 deletions dbt/macros/pad_school_id.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{#
background/description:
nces school ids should either be 12 characters (public) or 8 characters (private/charter)
sometimes the leading zeros are dropped in which case we left pad the result.

POTENTIAL FUTURE SAFE GUARD:
- it's theoretically possible for nces ids to have more than 1 leading zero.
- currently many of the 8-char school ids have as many is 5 leading zeros, for example.
- none of the 12-char ids to date have more than 1 leading zero, but I'm unsure whether that is hard and fast rule
- We should EITHER put some kind of test in to ensure that any school_ids are either 8 or 12 characters OR ensure that this macro handles it.

1.22.24 - however, I'm not baking that safe-guard in here because I haven't researched the true rules of nces id lengths. So for now, just handling the one known case for length 11
#}

-- Left pad school_id with 0s
{% macro pad_school_id(school_id) %}
case
when length({{ school_id }}) = 11 then lpad({{ school_id }}, 12, '0')
ELSE {{ school_id }}
END
{% endmacro %}
2 changes: 1 addition & 1 deletion dbt/models/intermediate/int_active_sections.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ final as (
,section_started_at
,num_students
from combined
where num_students >= 5
where num_students >= 5 --alignment check: rosetta does use >= 5 students as well
)

select *
Expand Down
16 changes: 8 additions & 8 deletions dbt/models/intermediate/int_section_mapping.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ followers as (
from {{ ref('stg_dashboard__followers') }}
),

teachers as (
select distinct
teacher_id,
school_id
from {{ ref('dim_teachers') }}
teacher_school_changes as (
select *
from {{ ref('int_teacher_schools_historical') }}
),

sections as (
Expand All @@ -44,7 +42,7 @@ combined as (
followers.student_id,
sections.user_id as teacher_id,
sections.section_id as section_id,
teachers.school_id,
tsc.school_id,
row_number() over(
partition by
followers.student_id,
Expand All @@ -56,11 +54,13 @@ combined as (
from followers
left join sections
on followers.section_id = sections.section_id
left join teachers
on sections.user_id = teachers.teacher_id
join school_years as sy
on followers.created_at
between sy.started_at and sy.ended_at
left join teacher_school_changes tsc
on sections.user_id = tsc.teacher_id
and sy.ended_at
between tsc.started_at and tsc.ended_at
),

final as (
Expand Down
12 changes: 9 additions & 3 deletions dbt/models/marts/metrics/_metrics__models.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
version: 2

models:
models:
- name: fct_weekly_school_acquisition_metrics
description: high-level aggregations used for the monthly initiative review on school acquisitions
description: |
### description
* high-level aggregations used for the monthly initiative review on school acquisitions
* see definitions below
test of image: ![codeorg logo](https://studio.code.org/assets/logo-2acd4ebc69c447786b866b98034bb3c0777b5f67cd8bd7955e97bba0b16f2bd1.svg)
columns:
- name: school_level
description: elementary, middle, or high school- note that a given school may be counted in multiple school level aggregations (e.g. K-8)
Expand All @@ -19,4 +25,4 @@ models:
- name: num_schools_this_week
description: total number of schools within the school level and active status dimensions within that week
- name: num_schools_running_total
description: total number of schools within the school level and active status dimensions as of that week
description: total number of schools within the school level and active status dimensions as of that week
21 changes: 13 additions & 8 deletions dbt/models/marts/schools/dim_school_status.sql
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,27 @@ started_schools as (
listagg( distinct course_name, ', ') within group (order by course_name) active_courses
from teacher_active_courses_with_sy
group by 1, 2
),

active_status_simple as (
)
, active_status_simple as (
select
all_schools_sy.school_id,
all_schools_sy.school_year,
case when started_schools.school_id is null then 0 else 1 end as is_active,
--case when started_schools.school_id is null then 0 else 1 end as is_active,

case when -- for a school to be active it has to have started a course that is NOT hoc-only
started_schools.school_id is not null
and started_schools.active_courses <> 'hoc' then 1
else 0
end as is_active,

started_schools.school_started_at,
started_schools.active_courses
from all_schools_sy
left join started_schools
on started_schools.school_id = all_schools_sy.school_id
and started_schools.school_year = all_schools_sy.school_year
),

full_status as (
)
, full_status as (
-- Determine the active status for each school in each school year

select
Expand Down Expand Up @@ -139,4 +144,4 @@ final as (
)

select *
from final
from final
33 changes: 31 additions & 2 deletions dbt/models/marts/students/dim_student_courses.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
/*
Updated 1/24/24: Filtered out certain courses from "counting" toward a student's being active
Updated 12/6/23:
1. Design:
Expand Down Expand Up @@ -46,7 +48,34 @@ combined as (
select
ul.user_id as student_id
,sy.school_year
,cs.course_name_true as course_name
--,cs.course_name_true as course_name

-- Case-when below limits courses that count for students to the ones explicitly listed.
--1. this should be replaced/fixed with a flag in course_structure that we can use to identify a true student course
--2. I've explicitly showing my work by listing all course_name_true values - as of 1.24.24 - and commented out ones we're not including
,case
when cs.course_name_true in (
'ai',
'csf',
'csp',
'csd',
'hoc',
'csc',
'csa'
-- 'other',
-- 'csa virtual pl',
-- 'csp virtual pl',
-- 'csd virtual pl',
-- 'csp self paced pl',
-- 'csc self paced pl',
-- 'csa self paced pl',
-- 'csf self paced pl',
-- 'csd self paced pl'
) then cs.course_name_true
--else null
end as course_name


,min(ul.created_at) as first_activity_at
,max(ul.created_at) as last_activity_at
from user_levels ul
Expand All @@ -60,4 +89,4 @@ combined as (
)

select *
from combined
from combined
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ with
source as (
select *
from {{ source('dashboard', 'sections') }}
where deleted_at is null
-- where deleted_at is null
),

renamed as (
Expand Down Expand Up @@ -32,7 +32,8 @@ renamed as (
-- timestamps
created_at,
updated_at,
first_activity_at
first_activity_at,
deleted_at
from source
)

Expand Down
23 changes: 21 additions & 2 deletions dbt/models/staging/dashboard/stg_dashboard__school_infos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ school_infos as (
select *
from {{ ref('base_dashboard__school_infos')}}
)
, final as (
select

select *
from school_infos
school_info_id,
country,
school_type,
zip,
state,
school_district_id,
school_district_other,
school_district_name,
{{ pad_school_id('school_id') }} as school_id,
school_other,
school_name,
full_address,
created_at,
updated_at,
validation_type
from school_infos
)
select *
from final
Loading

0 comments on commit 8c981ff

Please sign in to comment.