Skip to content

Commit

Permalink
fix after migration
Browse files Browse the repository at this point in the history
  • Loading branch information
max-ostapenko committed Nov 21, 2024
1 parent 85eb576 commit a74d79f
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 119 deletions.
4 changes: 2 additions & 2 deletions definitions/output/all/parsed_css.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ publish('parsed_css', {
requirePartitionFilter: true
},
tags: ['crawl_results_legacy']
}).preOps(ctx => `
}).query(ctx => `
DROP SNAPSHOT TABLE IF EXISTS ${ctx.self()};
CREATE SNAPSHOT TABLE ${ctx.self()}
CLONE ${ctx.ref('crawl', 'parsed_css')};
CLONE ${ctx.ref('crawl', 'parsed_css')}
`)
94 changes: 40 additions & 54 deletions definitions/output/crawl/pages.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,62 +7,48 @@ publish('pages', {
clusterBy: ['client', 'is_root_page', 'rank', 'page'],
requirePartitionFilter: true
},
columns: {
date: 'YYYY-MM-DD format of the HTTP Archive monthly crawl',
client: 'Test environment: desktop or mobile',
page: 'The URL of the page being tested',
is_root_page: 'Whether the page is the root of the origin',
root_page: 'The URL of the root page being tested, the origin followed by /',
rank: 'Site popularity rank, from CrUX',
wptid: 'ID of the WebPageTest results',
payload: 'JSON-encoded WebPageTest results for the page',
summary: 'JSON-encoded summarization of the page-level data',
custom_metrics: {
description: 'Custom metrics from WebPageTest',
columns: {
a11y: 'JSON-encoded A11Y metrics',
cms: 'JSON-encoded CMS detection',
cookies: 'JSON-encoded cookie metrics',
css_variables: 'JSON-encoded CSS variable metrics',
ecommerce: 'JSON-encoded ecommerce metrics',
element_count: 'JSON-encoded element count metrics',
javascript: 'JSON-encoded JavaScript metrics',
markup: 'JSON-encoded markup metrics',
media: 'JSON-encoded media metrics',
origin_trials: 'JSON-encoded origin trial metrics',
performance: 'JSON-encoded performance metrics',
privacy: 'JSON-encoded privacy metrics',
responsive_images: 'JSON-encoded responsive image metrics',
robots_txt: 'JSON-encoded robots.txt metrics',
security: 'JSON-encoded security metrics',
structured_data: 'JSON-encoded structured data metrics',
third_parties: 'JSON-encoded third-party metrics',
well_known: 'JSON-encoded well-known metrics',
wpt_bodies: 'JSON-encoded WebPageTest bodies',
other: 'JSON-encoded other custom metrics'
}
},
lighthouse: 'JSON-encoded Lighthouse report',
features: 'Blink features detected at runtime (see https://chromestatus.com/features)',
technologies: 'Technologies detected at runtime (see https://www.wappalyzer.com/)',
metadata: 'Additional metadata about the test'
},
tags: ['crawl_complete']
}).preOps(ctx => `
CREATE SCHEMA IF NOT EXISTS crawl;
CREATE TABLE IF NOT EXISTS ${ctx.self()}
(
date DATE NOT NULL OPTIONS(description='YYYY-MM-DD format of the HTTP Archive monthly crawl'),
client STRING NOT NULL OPTIONS(description='Test environment: desktop or mobile'),
page STRING NOT NULL OPTIONS(description='The URL of the page being tested'),
is_root_page BOOL NOT NULL OPTIONS(description='Whether the page is the root of the origin'),
root_page STRING NOT NULL OPTIONS(description='The URL of the root page being tested, the origin followed by /'),
rank INT64 OPTIONS(description='Site popularity rank, from CrUX'),
wptid STRING OPTIONS(description='ID of the WebPageTest results'),
payload JSON OPTIONS(description='JSON-encoded WebPageTest results for the page'),
summary JSON OPTIONS(description='JSON-encoded summarization of the page-level data'),
custom_metrics STRUCT<
a11y JSON,
cms JSON,
cookies JSON,
css_variables JSON,
ecommerce JSON,
element_count JSON,
javascript JSON,
markup JSON,
media JSON,
origin_trials JSON,
performance JSON,
privacy JSON,
responsive_images JSON,
robots_txt JSON,
security JSON,
structured_data JSON,
third_parties JSON,
well_known JSON,
wpt_bodies JSON,
other JSON
> OPTIONS(description='Custom metrics from WebPageTest'),
lighthouse JSON OPTIONS(description='JSON-encoded Lighthouse report'),
features ARRAY<STRUCT<
feature STRING OPTIONS(description='Blink feature name'),
id STRING OPTIONS(description='Blink feature ID'),
type STRING OPTIONS(description='Blink feature type (css, default)')
>> OPTIONS(description='Blink features detected at runtime (see https://chromestatus.com/features)'),
technologies ARRAY<STRUCT<
technology STRING OPTIONS(description='Name of the detected technology'),
categories ARRAY<STRING> OPTIONS(description='List of categories to which this technology belongs'),
info ARRAY<STRING> OPTIONS(description='Additional metadata about the detected technology, ie version number')
>> OPTIONS(description='Technologies detected at runtime (see https://www.wappalyzer.com/)'),
metadata JSON OPTIONS(description='Additional metadata about the test')
)
PARTITION BY date
CLUSTER BY client, is_root_page, rank, page
OPTIONS(
require_partition_filter=true
);
DELETE FROM ${ctx.self()}
WHERE date = '${constants.currentMonth}' AND
client = 'desktop';
Expand Down
85 changes: 49 additions & 36 deletions definitions/output/crawl/requests.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,37 @@ publish('requests', {
clusterBy: ['client', 'is_root_page', 'type', 'rank'],
requirePartitionFilter: true
},
columns: {
date: 'YYYY-MM-DD format of the HTTP Archive monthly crawl',
client: 'Test environment: desktop or mobile',
page: 'The URL of the page being tested',
is_root_page: 'Whether the page is the root of the origin.',
root_page: 'The URL of the root page being tested',
rank: 'Site popularity rank, from CrUX',
url: 'The URL of the request',
is_main_document: 'Whether this request corresponds with the main HTML document of the page, which is the first HTML request after redirects',
type: 'Simplified description of the type of resource (script, html, css, text, other, etc)',
index: 'The sequential 0-based index of the request',
payload: 'JSON-encoded WebPageTest result data for this request',
summary: 'JSON-encoded summarization of request data',
request_headers: {
description: 'Request headers',
columns: {
name: 'Request header name',
value: 'Request header value'
}
},
response_headers: {
description: 'Response headers',
columns: {
name: 'Response header name',
value: 'Response header value'
}
},
response_body: 'Text-based response body'
},
tags: ['crawl_complete']
}).preOps(ctx => `
CREATE SCHEMA IF NOT EXISTS crawl;
CREATE TABLE IF NOT EXISTS ${ctx.self()}
(
date DATE NOT NULL OPTIONS(description='YYYY-MM-DD format of the HTTP Archive monthly crawl'),
client STRING NOT NULL OPTIONS(description='Test environment: desktop or mobile'),
page STRING NOT NULL OPTIONS(description='The URL of the page being tested'),
is_root_page BOOL OPTIONS(description='Whether the page is the root of the origin.'),
root_page STRING NOT NULL OPTIONS(description='The URL of the root page being tested'),
rank INT64 OPTIONS(description='Site popularity rank, from CrUX'),
url STRING NOT NULL OPTIONS(description='The URL of the request'),
is_main_document BOOL NOT NULL OPTIONS(description='Whether this request corresponds with the main HTML document of the page, which is the first HTML request after redirects'),
type STRING OPTIONS(description='Simplified description of the type of resource (script, html, css, text, other, etc)'),
index INT64 OPTIONS(description='The sequential 0-based index of the request'),
payload JSON OPTIONS(description='JSON-encoded WebPageTest result data for this request'),
summary JSON OPTIONS(description='JSON-encoded summarization of request data'),
request_headers ARRAY<STRUCT<
name STRING OPTIONS(description='Request header name'),
value STRING OPTIONS(description='Request header value')
>> OPTIONS(description='Request headers'),
response_headers ARRAY<STRUCT<
name STRING OPTIONS(description='Response header name'),
value STRING OPTIONS(description='Response header value')
>> OPTIONS(description='Response headers'),
response_body STRING OPTIONS(description='Text-based response body')
)
PARTITION BY date
CLUSTER BY client, is_root_page, type, rank
OPTIONS(
require_partition_filter=true
);
CREATE TEMP FUNCTION pruneHeaders(
jsonObject JSON
) RETURNS JSON
Expand Down Expand Up @@ -105,8 +102,16 @@ FROM (
FROM ${ctx.ref('crawl_staging', 'requests')}
WHERE date = '${constants.currentMonth}'
AND client = 'desktop'
${constants.devTABLESAMPLE}
)
${constants.devRankFilter}
) AS requests
LEFT JOIN (
SELECT DISTINCT
CONCAT(origin, '/') AS page,
experimental.popularity.rank AS rank
FROM ${ctx.resolve('chrome-ux-report', 'experimental', 'global')}
WHERE yyyymm = ${constants.fnPastMonth(constants.currentMonth).substring(0, 7).replace('-', '')}
) AS crux
ON requests.root_page = crux.page
`).postOps(ctx => `
DELETE FROM ${ctx.self()}
WHERE date = '${constants.currentMonth}' AND
Expand Down Expand Up @@ -157,6 +162,14 @@ FROM (
FROM ${ctx.ref('crawl_staging', 'requests')}
WHERE date = '${constants.currentMonth}'
AND client = 'mobile'
${constants.devTABLESAMPLE}
)
${constants.devRankFilter}
) AS requests
LEFT JOIN (
SELECT DISTINCT
CONCAT(origin, '/') AS page,
experimental.popularity.rank AS rank
FROM ${ctx.resolve('chrome-ux-report', 'experimental', 'global')}
WHERE yyyymm = ${constants.fnPastMonth(constants.currentMonth).substring(0, 7).replace('-', '')}
) AS crux
ON requests.root_page = crux.page;
`)
54 changes: 27 additions & 27 deletions infra/tf/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a74d79f

Please sign in to comment.