From e58c1edd36fd408c356deeb0e894947721908545 Mon Sep 17 00:00:00 2001 From: Muhammad-Sharif Moustafa Date: Tue, 8 Aug 2023 20:18:13 -0400 Subject: [PATCH 1/5] added the query --- .../scheduled-data-exports_8.pgsql" | 92 +++++++++++++++++++ .../scheduled-data-exports.md" | 7 +- 2 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 "code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" diff --git "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" new file mode 100644 index 00000000..eb979e4c --- /dev/null +++ "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" @@ -0,0 +1,92 @@ +-- Subscriber Retention +/* +This query generates a table using three sub-tables: + 1. subs - a table grouped by first subscription purchase date + 2. retention - a table that counts the number of active subscriptions in each + date cohort + 3. pending_retention - a table that counts the number of subscriptions that + are expected to renew +The three tables are combined in full_table. +*/ + +WITH + -- subs is an intermediate table that maps subscribers to their earliest + -- subscription start date. The first subscription start date will be used to + -- group billing cycles by cohort + subs AS ( + SELECT + -- app user ID will be used to group billing cycles by user + ETL.app_user_id AS id, + -- first purchase date will be used to group billing cycles by cohort. + DATE(MIN(start_time)) AS first_start_time + FROM ETL + WHERE start_time >= /* desired date range */ + AND end_time IS NOT NULL -- count subscriptions only + AND is_sandbox <> 'true' -- remove any sandbox transactions + AND is_trial_period = 'false' -- remove any trial billing cycles + AND is_in_intro_offer_period = 'false' -- remove any intro offer billing cycles + AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') -- remove any subscriptions that were acquired through Family Sharing + AND product_identifier = /* targeted product(s) */ + GROUP BY ETL.app_user_id + ), + + -- retention is an intermediate table that counts the number of active subscriptions in each cohort and billing cycle + retention AS ( + SELECT + subs.first_start_time, -- cohorting subscriptions by their first start dates + CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) AS period_number, -- every billing cycle is labeled by its number. 30 is used here to filter for monthly subscriptions. Change this to filter for different product durations, e.g. 365 for yearly. So if a user subscribes in January, unsubscribes so that they don't have an active subscription in February, and resubscribes in March, they would be in the January cohort and counted as retained for Month 2 (skipping February, which is Month 1). + count(1) AS actives -- count the number of active subscriptions in each cohort and billing cycle + FROM ETL + INNER JOIN subs ON subs.id = ETL.app_user_id + WHERE start_time >= /* desired date range */ + AND end_time IS NOT NULL -- count subscriptions only + AND is_sandbox <> 'true' -- remove any sandbox transactions + AND is_trial_period = 'false' -- remove any trial billing cycles + AND is_in_intro_offer_period = 'false' -- remove any intro offer billing cycles + AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') -- remove any subscriptions that were acquired through Family Sharing + AND product_identifier = /* targeted product(s) */ + GROUP BY 1, 2 + ), + + -- pending_retention is an intermediate table that is identical to retention, with the exception that it counts subscriptions that are expected to renew in the future. Pending retention can change over time as users change their auto-renewal status. It can be interpreted as a measure of future retention. + pending_retention AS ( + SELECT + subs.first_start_time, + CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) + 1 AS period_number, + count(1) AS actives + FROM ETL + INNER JOIN subs ON subs.id = ETL.app_user_id + WHERE + is_auto_renewing = 'true' + AND DATEADD(month, 1, start_time) > GETDATE() -- count only the latest billing cycle of a subscription + AND start_time >= /* desired date range */ + AND end_time IS NOT NULL + AND is_sandbox <> 'true' + AND is_trial_period = 'false' + AND is_in_intro_offer_period = 'false' + AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') + AND product_identifier = /* targeted product(s) */ + GROUP BY 1, 2 + ), + + -- full_table combines retention and pending_retention into a single table + -- if you're not interested in pending subscriptions, you can remove pending_retention from this below query + full_table AS ( + SELECT + COALESCE(retention.first_start_time, pending_retention.first_start_time) AS first_start_time, + COALESCE(retention.period_number, pending_retention.period_number) AS period_number, + COALESCE(retention.actives, 0) + COALESCE(pending_retention.actives, 0) AS actives + FROM retention + FULL OUTER JOIN pending_retention ON + pending_retention.first_start_time = retention.first_start_time AND + pending_retention.period_number = retention.period_number + ) + + -- the final subscription retention table + SELECT + first_start_time as date, + period_number, + CONCAT('Month ', period_number) AS period_name, + actives + FROM full_table + ORDER BY date, period_number \ No newline at end of file diff --git "a/docs_source/\360\237\224\214 Integrations & Events/scheduled-data-exports.md" "b/docs_source/\360\237\224\214 Integrations & Events/scheduled-data-exports.md" index 04e54289..56a4b75d 100644 --- "a/docs_source/\360\237\224\214 Integrations & Events/scheduled-data-exports.md" +++ "b/docs_source/\360\237\224\214 Integrations & Events/scheduled-data-exports.md" @@ -287,7 +287,12 @@ You can use the following sample queries (written in Postgresql) as starting poi "language": "pgsql", "name": "Revenue", "file": "code_blocks/🔌 Integrations & Events/scheduled-data-exports_3.pgsql" - } + }, + { + "language": "pgsql", + "name": "Subscriber Retention", + "file": "code_blocks/🔌 Integrations & Events/scheduled-data-exports_8.pgsql" + }, ] [/block] From c1a18309dd07770d5f228cf348980ff85add6cc0 Mon Sep 17 00:00:00 2001 From: Muhammad-Sharif Moustafa Date: Mon, 28 Aug 2023 21:59:16 -0400 Subject: [PATCH 2/5] Added more comments and cleaned up the formatting --- .../scheduled-data-exports_8.pgsql" | 104 ++++++++++++++---- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" index eb979e4c..2345a1a2 100644 --- "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" +++ "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" @@ -1,18 +1,42 @@ --- Subscriber Retention /* -This query generates a table using three sub-tables: +This query generates a table that mirrors the Subscriber Retention chart. It +uses three intermediate tables: + 1. subs - a table grouped by first subscription purchase date 2. retention - a table that counts the number of active subscriptions in each date cohort 3. pending_retention - a table that counts the number of subscriptions that are expected to renew -The three tables are combined in full_table. + +The three tables are combined in full_table to show a table that looks like +this: + ++-------------------------+---------------+-------------+---------+ +| date | period_number | period_name | actives | ++-------------------------+---------------+-------------+---------+ +| 2023-01-01 00:00:00.000 | 1 | Month 1 | 500 | ++-------------------------+---------------+-------------+---------+ +| 2023-01-01 00:00:00.000 | 2 | Month 2 | 600 | ++-------------------------+---------------+-------------+---------+ +| 2023-01-01 00:00:00.000 | 3 | Month 3 | 550 | ++-------------------------+---------------+-------------+---------+ +| 2023-02-01 00:00:00.000 | 1 | Month 1 | 400 | ++-------------------------+---------------+-------------+---------+ +| 2023-02-01 00:00:00.000 | 2 | Month 2 | 300 | ++-------------------------+---------------+-------------+---------+ + +In the query you will see place holders that look like this: + + start_time >= /* desired date range */ + +Fill in each placeholder by following the instructions in each one. + */ WITH -- subs is an intermediate table that maps subscribers to their earliest -- subscription start date. The first subscription start date will be used to - -- group billing cycles by cohort + -- group billing cycles by cohort. subs AS ( SELECT -- app user ID will be used to group billing cycles by user @@ -21,44 +45,74 @@ WITH DATE(MIN(start_time)) AS first_start_time FROM ETL WHERE start_time >= /* desired date range */ - AND end_time IS NOT NULL -- count subscriptions only - AND is_sandbox <> 'true' -- remove any sandbox transactions - AND is_trial_period = 'false' -- remove any trial billing cycles - AND is_in_intro_offer_period = 'false' -- remove any intro offer billing cycles - AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') -- remove any subscriptions that were acquired through Family Sharing + -- count subscriptions only + AND end_time IS NOT NULL + -- remove any sandbox transactions + AND is_sandbox <> 'true' + -- remove any trial billing cycles + AND is_trial_period = 'false' + -- remove any intro offer billing cycles + AND is_in_intro_offer_period = 'false' + -- remove any subscriptions that were acquired through Family Sharing + AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') AND product_identifier = /* targeted product(s) */ GROUP BY ETL.app_user_id ), - -- retention is an intermediate table that counts the number of active subscriptions in each cohort and billing cycle + -- retention is an intermediate table that counts the number of active + -- subscriptions in each cohort and billing cycle. retention AS ( SELECT - subs.first_start_time, -- cohorting subscriptions by their first start dates - CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) AS period_number, -- every billing cycle is labeled by its number. 30 is used here to filter for monthly subscriptions. Change this to filter for different product durations, e.g. 365 for yearly. So if a user subscribes in January, unsubscribes so that they don't have an active subscription in February, and resubscribes in March, they would be in the January cohort and counted as retained for Month 2 (skipping February, which is Month 1). - count(1) AS actives -- count the number of active subscriptions in each cohort and billing cycle + -- cohorting subscriptions by their first start dates + subs.first_start_time, + -- every billing cycle is labeled by its number. 30 is used here to filter + -- for monthly subscriptions. Change this to filter for different product + -- durations, e.g. 365 for yearly. So if a user subscribes in January, + -- unsubscribes so that they don't have an active subscription in + -- February, and resubscribes in March, they would be in the January + -- cohort and counted as retained for Month 2 (skipping February, which is + -- Month 1). + CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) AS period_number, + -- count the number of active subscriptions in each cohort and billing + -- cycle + count(1) AS actives FROM ETL INNER JOIN subs ON subs.id = ETL.app_user_id WHERE start_time >= /* desired date range */ - AND end_time IS NOT NULL -- count subscriptions only - AND is_sandbox <> 'true' -- remove any sandbox transactions - AND is_trial_period = 'false' -- remove any trial billing cycles - AND is_in_intro_offer_period = 'false' -- remove any intro offer billing cycles - AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') -- remove any subscriptions that were acquired through Family Sharing + -- count subscriptions only + AND end_time IS NOT NULL + -- remove any sandbox transactions + AND is_sandbox <> 'true' + -- remove any trial billing cycles + AND is_trial_period = 'false' + -- remove any intro offer billing cycles + AND is_in_intro_offer_period = 'false' + -- remove any subscriptions that were acquired through Family Sharing + AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') AND product_identifier = /* targeted product(s) */ GROUP BY 1, 2 ), - -- pending_retention is an intermediate table that is identical to retention, with the exception that it counts subscriptions that are expected to renew in the future. Pending retention can change over time as users change their auto-renewal status. It can be interpreted as a measure of future retention. + -- pending_retention is an intermediate table that is identical to retention, + -- with the exception that it counts subscriptions that are expected to renew + -- in the future. Pending retention can change over time as users change their + -- auto-renewal status. It can be interpreted as a measure of future + -- retention. pending_retention AS ( SELECT subs.first_start_time, + -- This billing cycle calculation is similar to the one made in the + -- retention table, with the only difference being it calculates the next + -- billing cycle in the future. CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) + 1 AS period_number, count(1) AS actives FROM ETL INNER JOIN subs ON subs.id = ETL.app_user_id WHERE + -- count only subscriptions that are expected to renew is_auto_renewing = 'true' - AND DATEADD(month, 1, start_time) > GETDATE() -- count only the latest billing cycle of a subscription + -- count only the latest billing cycle of a subscription + AND DATEADD(month, 1, start_time) > GETDATE() AND start_time >= /* desired date range */ AND end_time IS NOT NULL AND is_sandbox <> 'true' @@ -69,8 +123,14 @@ WITH GROUP BY 1, 2 ), - -- full_table combines retention and pending_retention into a single table - -- if you're not interested in pending subscriptions, you can remove pending_retention from this below query + -- full_table combines retention and pending_retention into the final + -- subscriber retention table. If you're not interested in pending + -- subscriptions, you can remove pending_retention from this below query. For + -- example: + -- COALESCE(retention.first_start_time, pending_retention.first_start_time) AS first_start_time, + -- should be changed into: + -- retention.first_start_time AS first_start_time, + -- if pending retention is not needed. full_table AS ( SELECT COALESCE(retention.first_start_time, pending_retention.first_start_time) AS first_start_time, From 69f91fd2c2fe8cc0ebc0018c20ae02c243adb8ce Mon Sep 17 00:00:00 2001 From: Muhammad-Sharif Moustafa Date: Mon, 28 Aug 2023 22:08:55 -0400 Subject: [PATCH 3/5] More formatting --- .../scheduled-data-exports_8.pgsql" | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" index 2345a1a2..44b15efb 100644 --- "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" +++ "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" @@ -127,9 +127,9 @@ WITH -- subscriber retention table. If you're not interested in pending -- subscriptions, you can remove pending_retention from this below query. For -- example: - -- COALESCE(retention.first_start_time, pending_retention.first_start_time) AS first_start_time, + -- COALESCE(retention.first_start_time, pending_retention.first_start_time) AS first_start_time, -- should be changed into: - -- retention.first_start_time AS first_start_time, + -- retention.first_start_time AS first_start_time, -- if pending retention is not needed. full_table AS ( SELECT From 25554dc5bbe7cdece60d2a82bb83c3c0ebd61b7c Mon Sep 17 00:00:00 2001 From: Muhammad-Sharif Moustafa Date: Tue, 19 Sep 2023 23:52:21 -0400 Subject: [PATCH 4/5] incorporate Dan's feedback --- .../scheduled-data-exports_8.pgsql" | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" index 44b15efb..2f946810 100644 --- "a/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" +++ "b/code_blocks/\360\237\224\214 Integrations & Events/scheduled-data-exports_8.pgsql" @@ -40,10 +40,10 @@ WITH subs AS ( SELECT -- app user ID will be used to group billing cycles by user - ETL.app_user_id AS id, + rc.app_user_id AS id, -- first purchase date will be used to group billing cycles by cohort. DATE(MIN(start_time)) AS first_start_time - FROM ETL + FROM [revenuecat_data_table] rc WHERE start_time >= /* desired date range */ -- count subscriptions only AND end_time IS NOT NULL @@ -54,9 +54,9 @@ WITH -- remove any intro offer billing cycles AND is_in_intro_offer_period = 'false' -- remove any subscriptions that were acquired through Family Sharing - AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') + AND ownership_type != 'FAMILY_SHARED' AND product_identifier = /* targeted product(s) */ - GROUP BY ETL.app_user_id + GROUP BY rc.app_user_id ), -- retention is an intermediate table that counts the number of active @@ -65,19 +65,16 @@ WITH SELECT -- cohorting subscriptions by their first start dates subs.first_start_time, - -- every billing cycle is labeled by its number. 30 is used here to filter - -- for monthly subscriptions. Change this to filter for different product - -- durations, e.g. 365 for yearly. So if a user subscribes in January, - -- unsubscribes so that they don't have an active subscription in - -- February, and resubscribes in March, they would be in the January - -- cohort and counted as retained for Month 2 (skipping February, which is - -- Month 1). + -- Each period number represents the number of billing cycles the + -- subscriber was active for. Be sure to divide the date difference by the + -- number of days for the product duration being analyzed (e.g. 30 for + -- monthly, 365 for yearly, etc). CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) AS period_number, -- count the number of active subscriptions in each cohort and billing -- cycle count(1) AS actives - FROM ETL - INNER JOIN subs ON subs.id = ETL.app_user_id + FROM rc + INNER JOIN subs ON subs.id = rc.app_user_id WHERE start_time >= /* desired date range */ -- count subscriptions only AND end_time IS NOT NULL @@ -88,16 +85,14 @@ WITH -- remove any intro offer billing cycles AND is_in_intro_offer_period = 'false' -- remove any subscriptions that were acquired through Family Sharing - AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') + AND ownership_type != 'FAMILY_SHARED' AND product_identifier = /* targeted product(s) */ GROUP BY 1, 2 ), - -- pending_retention is an intermediate table that is identical to retention, - -- with the exception that it counts subscriptions that are expected to renew - -- in the future. Pending retention can change over time as users change their - -- auto-renewal status. It can be interpreted as a measure of future - -- retention. + -- pending_retention is an intermediate table that counts subscriptions that + -- are set to automatically renew in the future, and can therefore be thought + -- of possible future retention. pending_retention AS ( SELECT subs.first_start_time, @@ -106,8 +101,8 @@ WITH -- billing cycle in the future. CAST(ROUND(DATEDIFF(day, subs.first_start_time, start_time) / CAST(30 AS NUMERIC)) AS INTEGER) + 1 AS period_number, count(1) AS actives - FROM ETL - INNER JOIN subs ON subs.id = ETL.app_user_id + FROM [revenuecat_data_table] rc + INNER JOIN subs ON subs.id = rc.app_user_id WHERE -- count only subscriptions that are expected to renew is_auto_renewing = 'true' @@ -118,7 +113,7 @@ WITH AND is_sandbox <> 'true' AND is_trial_period = 'false' AND is_in_intro_offer_period = 'false' - AND (ownership_type IS NULL OR ownership_type != 'FAMILY_SHARED') + AND ownership_type != 'FAMILY_SHARED' AND product_identifier = /* targeted product(s) */ GROUP BY 1, 2 ), From c265230ccd40c1eb077e82ca283dffb93a496c38 Mon Sep 17 00:00:00 2001 From: Cesar de la Vega Date: Wed, 29 Nov 2023 19:01:19 +0100 Subject: [PATCH 5/5] fix rendering --- docs_source/Integrations & Events/scheduled-data-exports.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs_source/Integrations & Events/scheduled-data-exports.md b/docs_source/Integrations & Events/scheduled-data-exports.md index 74426ad0..4fe8dd46 100644 --- a/docs_source/Integrations & Events/scheduled-data-exports.md +++ b/docs_source/Integrations & Events/scheduled-data-exports.md @@ -292,7 +292,7 @@ You can use the following sample queries (written in Postgresql) as starting poi "language": "pgsql", "name": "Subscriber Retention", "file": "code_blocks/🔌 Integrations & Events/scheduled-data-exports_9.pgsql" - }, + } ] [/block]