diff --git a/app/analytics/__tests__/collect.test.ts b/app/analytics/__tests__/collect.test.ts index 2bb47d9..b89170c 100644 --- a/app/analytics/__tests__/collect.test.ts +++ b/app/analytics/__tests__/collect.test.ts @@ -69,7 +69,8 @@ describe("collectRequestHandler", () => { ], doubles: [ 1, // new visitor - 1, // new session + 0, // DEAD COLUMN (was session) + 1, // new visit, so bounce ], indexes: [ "example", // site id is index @@ -94,7 +95,8 @@ describe("collectRequestHandler", () => { "doubles", [ 1, // new visitor - 1, // new session + 0, // DEAD COLUMN (was session) + 1, // new visit, so bounce ], ); }); @@ -122,7 +124,8 @@ describe("collectRequestHandler", () => { "doubles", [ 0, // NOT a new visitor - 0, // NOT a new session + 0, // DEAD COLUMN (was session) + 0, // NOT first or second visit ], ); }); @@ -155,8 +158,8 @@ describe("collectRequestHandler", () => { "doubles", [ 1, // new visitor because a new day began - 0, // NOT a new session because continuation of earlier session (< 30 mins) - // (session logic doesn't care if a new day began or not) + 0, // DEAD COLUMN (was session) + 1, // new visitor so bounce counted ], ); }); @@ -184,7 +187,8 @@ describe("collectRequestHandler", () => { "doubles", [ 1, // new visitor because > 30 days passed - 1, // new session because > 30 minutes passed + 0, // DEAD COLUMN (was session) + 1, // new visitor so bounce ], ); }); @@ -212,7 +216,84 @@ describe("collectRequestHandler", () => { "doubles", [ 1, // new visitor because > 24 hours passed - 1, // new session because > 30 minutes passed + 0, // DEAD COLUMN (was session) + 1, // new visitor so bounce + ], + ); + }); + + test("if-modified-since is one second after midnight", () => { + const env = { + WEB_COUNTER_AE: { + writeDataPoint: vi.fn(), + } as AnalyticsEngineDataset, + } as Env; + + const midnight = new Date(); + midnight.setHours(0, 0, 0, 0); + + vi.setSystemTime(midnight.getTime()); + + const midnightPlusOneSecond = new Date(midnight.getTime()); + midnightPlusOneSecond.setSeconds( + midnightPlusOneSecond.getSeconds() + 1, + ); + + const request = httpMocks.createRequest( + // @ts-expect-error - we're mocking the request object + generateRequestParams({ + "if-modified-since": midnightPlusOneSecond.toUTCString(), + }), + ); + + collectRequestHandler(request as any, env); + + const writeDataPoint = env.WEB_COUNTER_AE.writeDataPoint; + expect((writeDataPoint as Mock).mock.calls[0][0]).toHaveProperty( + "doubles", + [ + 0, // NOT a new visitor + 0, // DEAD COLUMN (was session) + -1, // First visit after the initial visit so decrement bounce + ], + ); + }); + + test("if-modified-since is two seconds after midnight", () => { + const env = { + WEB_COUNTER_AE: { + writeDataPoint: vi.fn(), + } as AnalyticsEngineDataset, + } as Env; + + const midnightPlusOneSecond = new Date(); + midnightPlusOneSecond.setHours(0, 0, 1, 0); + + vi.setSystemTime(midnightPlusOneSecond.getTime()); + + const midnightPlusTwoSeconds = new Date( + midnightPlusOneSecond.getTime(), + ); + midnightPlusTwoSeconds.setSeconds( + midnightPlusTwoSeconds.getSeconds() + 1, + ); + + const request = httpMocks.createRequest( + // @ts-expect-error - we're mocking the request object + generateRequestParams({ + "if-modified-since": midnightPlusTwoSeconds.toUTCString(), + }), + ); + + collectRequestHandler(request as any, env); + + const writeDataPoint = env.WEB_COUNTER_AE.writeDataPoint; + expect((writeDataPoint as Mock).mock.calls[0][0]).toHaveProperty( + "doubles", + [ + 0, // NOT a new visitor + 0, // DEAD COLUMN (was session) + 0, // After the second visit so no bounce ], ); }); diff --git a/app/analytics/__tests__/query.test.ts b/app/analytics/__tests__/query.test.ts index fc66eea..ade4cc5 100644 --- a/app/analytics/__tests__/query.test.ts +++ b/app/analytics/__tests__/query.test.ts @@ -187,24 +187,24 @@ describe("AnalyticsEngineAPI", () => { }); describe("getCounts", () => { - test("should return an object with view, visit, and visitor counts", async () => { + test("should return an object with view, visitor, and bounce counts", async () => { fetch.mockResolvedValue( createFetchResponse({ data: [ { count: 3, - isVisit: 1, isVisitor: 0, + isBounce: 1, }, { count: 2, - isVisit: 0, isVisitor: 0, + isBounce: 0, }, { count: 1, - isVisit: 0, isVisitor: 1, + isBounce: -1, }, ], }), @@ -216,8 +216,8 @@ describe("AnalyticsEngineAPI", () => { expect(fetch).toHaveBeenCalled(); expect(await result).toEqual({ views: 6, - visits: 3, visitors: 1, + bounces: 2, }); }); }); @@ -324,21 +324,63 @@ describe("AnalyticsEngineAPI", () => { ).toEqual( "SELECT blob4, " + "double1 as isVisitor, " + - "double2 as isVisit, " + + "double3 as isBounce, " + "SUM(_sample_interval) as count " + "FROM metricsDataset WHERE timestamp >= NOW() - INTERVAL '7' DAY AND timestamp < NOW() AND blob8 = 'example.com' AND blob4 = 'CA' " + - "GROUP BY blob4, double1, double2 " + + "GROUP BY blob4, double1, double3 " + "ORDER BY count DESC LIMIT 10", ); expect(await result).toEqual({ CA: { views: 3, visitors: 0, - visits: 0, + bounces: 0, }, }); }); }); + + describe("getEarliestEvents", () => { + test("returns both earliest event and bounce dates when found", async () => { + const mockEventTimestamp = "2024-01-01T10:00:00Z"; + const mockBounceTimestamp = "2024-01-01T12:00:00Z"; + + // Mock responses for both queries + fetch.mockResolvedValueOnce( + createFetchResponse({ + ok: true, + data: [ + { earliestEvent: mockBounceTimestamp, isBounce: 1 }, + { earliestEvent: mockEventTimestamp, isBounce: 0 }, + ], + }), + ); + + const result = await api.getEarliestEvents("test-site"); + expect(result).toEqual({ + earliestEvent: new Date(mockEventTimestamp), + earliestBounce: new Date(mockBounceTimestamp), + }); + }); + + test("returns only earliest event when no bounces found", async () => { + const mockEventTimestamp = "2024-01-01T10:00:00Z"; + + // Mock responses for both queries + fetch.mockResolvedValueOnce( + createFetchResponse({ + ok: true, + data: [{ earliestEvent: mockEventTimestamp, isBounce: 0 }], + }), + ); + + const result = await api.getEarliestEvents("test-site"); + expect(result).toEqual({ + earliestEvent: new Date(mockEventTimestamp), + earliestBounce: null, + }); + }); + }); }); describe("intervalToSql", () => { diff --git a/app/analytics/collect.ts b/app/analytics/collect.ts index 7682997..82df4ca 100644 --- a/app/analytics/collect.ts +++ b/app/analytics/collect.ts @@ -5,14 +5,55 @@ import type { RequestInit } from "@cloudflare/workers-types"; // Cookieless visitor/session tracking // Uses the approach described here: https://notes.normally.com/cookieless-unique-visitor-counts/ +function getMidnightDate(): Date { + const midnight = new Date(); + midnight.setHours(0, 0, 0, 0); + return midnight; +} + +function getNextLastModifiedDate(current: Date | null): Date { + // in case date is an 'Invalid Date' + if (current && isNaN(current.getTime())) { + current = null; + } + + const midnight = getMidnightDate(); + + // check if new day, if it is then set to midnight + let next = current ? current : midnight; + next = midnight.getTime() - next.getTime() > 0 ? midnight : next; + + // increment counter + next.setSeconds(next.getSeconds() + 1); + return next; +} + +function getBounceValue(nextLastModifiedDate: Date | null): number { + if (!nextLastModifiedDate) { + return 0; + } + + const midnight = getMidnightDate(); + + // NOTE: minus one because this is the response last modified date + const visits = + (nextLastModifiedDate.getTime() - midnight.getTime()) / 1000 - 1; + + switch (visits) { + case 0: + return 1; + case 1: + return -1; + default: + return 0; + } +} + function checkVisitorSession(ifModifiedSince: string | null): { newVisitor: boolean; - newSession: boolean; } { let newVisitor = true; - let newSession = true; - const minutesUntilSessionResets = 30; if (ifModifiedSince) { // check today is a new day vs ifModifiedSince const today = new Date(); @@ -25,18 +66,9 @@ function checkVisitorSession(ifModifiedSince: string | null): { // if ifModifiedSince is today, this is not a new visitor newVisitor = false; } - - // check ifModifiedSince is less than 30 mins ago - if ( - Date.now() - new Date(ifModifiedSince).getTime() < - minutesUntilSessionResets * 60 * 1000 - ) { - // this is a continuation of the same session - newSession = false; - } } - return { newVisitor, newSession }; + return { newVisitor }; } function extractParamsFromQueryString(requestUrl: string): { @@ -62,8 +94,10 @@ export function collectRequestHandler(request: Request, env: Env) { parsedUserAgent.getBrowser().name; - const { newVisitor, newSession } = checkVisitorSession( - request.headers.get("if-modified-since"), + const ifModifiedSince = request.headers.get("if-modified-since"); + const { newVisitor } = checkVisitorSession(ifModifiedSince); + const nextLastModifiedDate = getNextLastModifiedDate( + ifModifiedSince ? new Date(ifModifiedSince) : null, ); const data: DataPoint = { @@ -72,7 +106,8 @@ export function collectRequestHandler(request: Request, env: Env) { path: params.p, referrer: params.r, newVisitor: newVisitor ? 1 : 0, - newSession: newSession ? 1 : 0, + newSession: 0, // dead column + bounce: newVisitor ? 1 : getBounceValue(nextLastModifiedDate), // user agent stuff userAgent: userAgent, browserName: parsedUserAgent.getBrowser().name, @@ -104,7 +139,7 @@ export function collectRequestHandler(request: Request, env: Env) { Expires: "Mon, 01 Jan 1990 00:00:00 GMT", "Cache-Control": "no-cache", Pragma: "no-cache", - "Last-Modified": new Date().toUTCString(), + "Last-Modified": nextLastModifiedDate.toUTCString(), Tk: "N", // not tracking }, status: 200, @@ -127,6 +162,7 @@ interface DataPoint { // doubles newVisitor: number; newSession: number; + bounce: number; } // NOTE: Cloudflare Analytics Engine has limits on total number of bytes, number of fields, etc. @@ -148,7 +184,7 @@ export function writeDataPoint( data.deviceModel || "", // blob7 data.siteId || "", // blob8 ], - doubles: [data.newVisitor || 0, data.newSession || 0], + doubles: [data.newVisitor || 0, data.newSession || 0, data.bounce], }; if (!analyticsEngine) { diff --git a/app/analytics/query.ts b/app/analytics/query.ts index 9e8079b..51ecd4c 100644 --- a/app/analytics/query.ts +++ b/app/analytics/query.ts @@ -19,12 +19,12 @@ interface AnalyticsQueryResult< interface AnalyticsCountResult { views: number; - visits: number; visitors: number; + bounces: number; } /** Given an AnalyticsCountResult object, and an object representing a row returned from - * CF Analytics Engine w/ counts grouped by isVisitor and isVisit, accumulate view, + * CF Analytics Engine w/ counts grouped by isVisitor, accumulate view, * visit, and visitor counts. */ function accumulateCountsFromRowResult( @@ -32,15 +32,16 @@ function accumulateCountsFromRowResult( row: { count: number; isVisitor: number; - isVisit: number; + isBounce: number; }, ) { - if (row.isVisit == 1) { - counts.visits += Number(row.count); - } if (row.isVisitor == 1) { counts.visitors += Number(row.count); } + if (row.isBounce && row.isBounce != 0) { + // bounce is either 1 or -1 + counts.bounces += Number(row.count) * row.isBounce; + } counts.views += Number(row.count); } @@ -208,9 +209,8 @@ export class AnalyticsEngineAPI { const filterStr = filtersToSql(filters); - // NOTE: when using toStartOfInterval, cannot group by other columns - // like double1 (isVisitor) or double2 (isSession/isVisit). This - // is just a limitation of Cloudflare Analytics Engine. + // NOTE: when using toStartOfInterval, cannot group by other columns like double1 (isVisitor). + // This is just a limitation of Cloudflare Analytics Engine. // -- but you can filter on them (using WHERE) // NOTE 2: Since CF AE doesn't support COALESCE, this query will not return @@ -230,7 +230,7 @@ export class AnalyticsEngineAPI { /* output as UTC */ toDateTime(_bucket, 'Etc/UTC') as bucket FROM metricsDataset - WHERE timestamp >= toDateTime('${localStartTime.format("YYYY-MM-DD HH:mm:ss")}') + WHERE timestamp >= toDateTime('${localStartTime.format("YYYY-MM-DD HH:mm:ss")}') AND timestamp < toDateTime('${localEndTime.format("YYYY-MM-DD HH:mm:ss")}') AND ${ColumnMappings.siteId} = '${siteId}' ${filterStr} @@ -303,18 +303,18 @@ export class AnalyticsEngineAPI { const query = ` SELECT SUM(_sample_interval) as count, ${ColumnMappings.newVisitor} as isVisitor, - ${ColumnMappings.newSession} as isVisit + ${ColumnMappings.bounce} as isBounce FROM metricsDataset WHERE timestamp >= ${startIntervalSql} AND timestamp < ${endIntervalSql} ${filterStr} AND ${siteIdColumn} = '${siteId}' - GROUP BY isVisitor, isVisit - ORDER BY isVisitor, isVisit ASC`; + GROUP BY isVisitor, isBounce + ORDER BY isVisitor, isBounce ASC`; type SelectionSet = { count: number; isVisitor: number; - isVisit: number; + isBounce: number; }; const queryResult = this.query(query); @@ -334,7 +334,7 @@ export class AnalyticsEngineAPI { const counts: AnalyticsCountResult = { views: 0, visitors: 0, - visits: 0, + bounces: 0, }; // NOTE: note it's possible to get no results, or half results (i.e. a row where isVisit=1 but @@ -436,20 +436,20 @@ export class AnalyticsEngineAPI { const query = ` SELECT ${_column}, ${ColumnMappings.newVisitor} as isVisitor, - ${ColumnMappings.newSession} as isVisit, + ${ColumnMappings.bounce} as isBounce, SUM(_sample_interval) as count FROM metricsDataset WHERE timestamp >= ${startIntervalSql} AND timestamp < ${endIntervalSql} AND ${ColumnMappings.siteId} = '${siteId}' ${filterStr} - GROUP BY ${_column}, ${ColumnMappings.newVisitor}, ${ColumnMappings.newSession} + GROUP BY ${_column}, ${ColumnMappings.newVisitor}, ${ColumnMappings.bounce} ORDER BY count DESC LIMIT ${limit * page}`; type SelectionSet = { readonly count: number; readonly isVisitor: number; - readonly isVisit: number; + readonly isBounce: number; } & Record< (typeof ColumnMappings)[T], ColumnMappingToType<(typeof ColumnMappings)[T]> @@ -482,7 +482,7 @@ export class AnalyticsEngineAPI { acc[key] = { views: 0, visitors: 0, - visits: 0, + bounces: 0, } as AnalyticsCountResult; } @@ -650,4 +650,61 @@ export class AnalyticsEngineAPI { ); return returnPromise; } + + async getEarliestEvents(siteId: string): Promise<{ + earliestEvent: Date | null; + earliestBounce: Date | null; + }> { + const query = ` + SELECT + MIN(timestamp) as earliestEvent, + ${ColumnMappings.bounce} as isBounce + FROM metricsDataset + WHERE ${ColumnMappings.siteId} = '${siteId}' + GROUP by isBounce + `; + + type SelectionSet = { + earliestEvent: string; + isBounce: number; + }; + const queryResult = this.query(query); + const returnPromise = new Promise<{ + earliestEvent: Date | null; + earliestBounce: Date | null; + }>((resolve, reject) => { + (async () => { + const response = await queryResult; + + if (!response.ok) { + reject(response.statusText); + return; + } + + const responseData = + (await response.json()) as AnalyticsQueryResult; + + const data = responseData.data; + + const earliestEvent = data.find( + (row) => row["isBounce"] === 0, + )?.earliestEvent; + + const earliestBounce = data.find( + (row) => row["isBounce"] === 1, + )?.earliestEvent; + + resolve({ + earliestEvent: earliestEvent + ? new Date(earliestEvent) + : null, + earliestBounce: earliestBounce + ? new Date(earliestBounce) + : null, + }); + })(); + }); + + return returnPromise; + } } diff --git a/app/analytics/schema.ts b/app/analytics/schema.ts index 4beb097..a943d08 100644 --- a/app/analytics/schema.ts +++ b/app/analytics/schema.ts @@ -32,4 +32,7 @@ export const ColumnMappings = { // this record is a new session (resets after 30m inactivity) newSession: "double2", + + // this record is the bounce value + bounce: "double3", } as const; diff --git a/app/routes/__tests__/dashboard.test.tsx b/app/routes/__tests__/dashboard.test.tsx index 4ba95e8..238ffd1 100644 --- a/app/routes/__tests__/dashboard.test.tsx +++ b/app/routes/__tests__/dashboard.test.tsx @@ -217,7 +217,6 @@ describe("Dashboard route", () => { loader: () => { return json({ views: 0, - visits: 0, visitors: 0, }); }, @@ -270,7 +269,6 @@ describe("Dashboard route", () => { siteId: "example", sites: ["example"], views: 2133, - visits: 80, visitors: 33, viewsGroupedByInterval: [ ["2024-01-11 05:00:00", 0], @@ -304,7 +302,6 @@ describe("Dashboard route", () => { loader: () => { return json({ views: 2133, - visits: 80, visitors: 33, }); }, diff --git a/app/routes/__tests__/resources.stats.test.tsx b/app/routes/__tests__/resources.stats.test.tsx index 767ec63..f0629e3 100644 --- a/app/routes/__tests__/resources.stats.test.tsx +++ b/app/routes/__tests__/resources.stats.test.tsx @@ -1,17 +1,33 @@ -import { describe, test, expect, vi } from "vitest"; +import { describe, test, expect, beforeEach, afterEach, vi } from "vitest"; import { loader } from "../resources.stats"; describe("resources.stats loader", () => { - test("returns formatted stats from analytics engine", async () => { - const mockGetCounts = vi.fn().mockResolvedValue({ + let mockGetCounts: any; + beforeEach(() => { + mockGetCounts = vi.fn().mockResolvedValue({ views: 1000, - visits: 500, visitors: 250, + bounces: 125, + }); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + test("returns formatted stats from analytics engine", async () => { + vi.setSystemTime(new Date("2023-01-01T06:00:00").getTime()); + + const mockGetEarliestEvents = vi.fn().mockResolvedValue({ + // earliest event and earliest bounce are the same + earliestEvent: new Date("2023-01-01T00:00:00Z"), + earliestBounce: new Date("2023-01-01T00:00:00Z"), }); const context = { analyticsEngine: { getCounts: mockGetCounts, + getEarliestEvents: mockGetEarliestEvents, }, }; @@ -31,8 +47,73 @@ describe("resources.stats loader", () => { expect(data).toEqual({ views: 1000, - visits: 500, visitors: 250, + bounceRate: 0.5, + hasSufficientBounceData: true, + }); + }); + + test("if bounce data isn't complete for the given interval, hasSufficientBounceData is false", async () => { + // set system time as jan 8th + vi.setSystemTime(new Date("2023-01-08T00:00:00").getTime()); + + const mockGetEarliestEvents = vi.fn().mockResolvedValue({ + earliestEvent: new Date("2023-01-01T00:00:00Z"), + earliestBounce: new Date("2023-01-04T00:00:00Z"), // Jan 4 + }); + + const context = { + analyticsEngine: { + getCounts: mockGetCounts, + getEarliestEvents: mockGetEarliestEvents, + }, + }; + + const request = new Request( + // 7 day interval (specified in query string) + "https://example.com/resources/stats?site=test-site&interval=7d&timezone=UTC", + ); + + const response = await loader({ context, request } as any); + const data = await response.json(); + + expect(data).toEqual({ + views: 1000, + visitors: 250, + bounceRate: 0.5, + hasSufficientBounceData: false, + }); + }); + + test("if bounce data *IS* complete for the given interval, show it", async () => { + // set system time as jan 8th + vi.setSystemTime(new Date("2023-01-08T00:00:00").getTime()); + + const mockGetEarliestEvents = vi.fn().mockResolvedValue({ + earliestEvent: new Date("2023-01-01T00:00:00Z"), + earliestBounce: new Date("2023-01-04T00:00:00Z"), // Jan 4 -- well before Jan 8th minus 1 day interval + }); + + const context = { + analyticsEngine: { + getCounts: mockGetCounts, + getEarliestEvents: mockGetEarliestEvents, + }, + }; + + const request = new Request( + // 1 day interval (specified in query string) + "https://example.com/resources/stats?site=test-site&interval=1d&timezone=UTC", + ); + + const response = await loader({ context, request } as any); + const data = await response.json(); + + expect(data).toEqual({ + views: 1000, + visitors: 250, + bounceRate: 0.5, + hasSufficientBounceData: true, }); }); }); diff --git a/app/routes/resources.stats.tsx b/app/routes/resources.stats.tsx index 39492ba..2891260 100644 --- a/app/routes/resources.stats.tsx +++ b/app/routes/resources.stats.tsx @@ -1,6 +1,10 @@ import type { LoaderFunctionArgs } from "@remix-run/cloudflare"; import { json } from "@remix-run/cloudflare"; -import { getFiltersFromSearchParams, paramsFromUrl } from "~/lib/utils"; +import { + getDateTimeRange, + getFiltersFromSearchParams, + paramsFromUrl, +} from "~/lib/utils"; import { useEffect } from "react"; import { useFetcher } from "@remix-run/react"; import { Card } from "~/components/ui/card"; @@ -13,12 +17,39 @@ export async function loader({ context, request }: LoaderFunctionArgs) { const tz = url.searchParams.get("timezone") || "UTC"; const filters = getFiltersFromSearchParams(url.searchParams); + // intentionally parallelize queries by deferring await + const earliestEvents = analyticsEngine.getEarliestEvents(site); const counts = await analyticsEngine.getCounts(site, interval, tz, filters); + const { earliestEvent, earliestBounce } = await earliestEvents; + const { startDate } = getDateTimeRange(interval, tz); + + // FOR BACKWARDS COMPAT, ONLY SHOW BOUNCE RATE IF WE HAVE DATE FOR THE ENTIRE QUERY PERIOD + // ----------------------------------------------------------------------------- + // Bounce rate is a later-introduced metric that may not have been recorded for + // the full duration of the queried Counterscale dataset (not possible to backfill + // data we dont have!) + + // So, cannot reliably show "bounce rate" if bounce data was unavailable for a portion + // of the query period. + + // To figure out if we can give an answer or not, we inspect the earliest bounce/earliest event + // data recorded, and determine if our dataset is "complete" for the given query interval. + + const hasSufficientBounceData = + earliestBounce !== null && + earliestEvent !== null && + (earliestEvent.getTime() == earliestBounce.getTime() || // earliest event recorded a bounce -- any query is fine + earliestBounce < startDate); // earliest bounce occurred before start of query period -- this query is fine + + const bounceRate = + counts.visitors > 0 ? counts.bounces / counts.visitors : undefined; + return json({ views: counts.views, - visits: counts.visits, visitors: counts.visitors, + bounceRate: bounceRate, + hasSufficientBounceData, }); } @@ -35,7 +66,8 @@ export const StatsCard = ({ }) => { const dataFetcher = useFetcher(); - const { views, visits, visitors } = dataFetcher.data || {}; + const { views, visitors, bounceRate, hasSufficientBounceData } = + dataFetcher.data || {}; const countFormatter = Intl.NumberFormat("en", { notation: "compact" }); useEffect(() => { @@ -64,18 +96,24 @@ export const StatsCard = ({ {views ? countFormatter.format(views) : "-"} -
-
Visits
-
- {visits ? countFormatter.format(visits) : "-"} -
-
Visitors
{visitors ? countFormatter.format(visitors) : "-"}
+
+
Bounce Rate
+ {hasSufficientBounceData ? ( +
+ {bounceRate !== undefined + ? `${Math.round(bounceRate * 100)}%` + : "-"} +
+ ) : ( +
n/a
+ )} +