From db39812f100e87aaf5cad8875bc4669353fbf245 Mon Sep 17 00:00:00 2001 From: Ian Clelland Date: Thu, 4 Feb 2021 08:51:12 -0800 Subject: [PATCH] Expand functionality of reporting API endpoint This adds some new capabilities to the WPT reporting endpoint, which brings it more in line with the other existing endpoints: - Support for CSP L2 report-uri format - Storing and retrieving credentials send with reports - Can retrieve the number of calls made to the endpoint - Support for hash-based endpoint ids (rather than hard-coded UUIDs) - Callers can specify the wait time before the endpoint responds - Callers can specify a minimum number of reports to return - CORS is configured to reject one calling domain for testing The existing reporting test is updated to use this new collector. Tests in other directories will be updated in follow-up CLs. Bug: 704259 Change-Id: Iec050bbc2db809c8569041d0055dc109a58d729e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2659377 Reviewed-by: Stephen McGruer Commit-Queue: Ian Clelland Cr-Commit-Position: refs/heads/master@{#850603} --- lint.ignore | 1 + ...solute-endpoint.https.sub.html.sub.headers | 2 +- reporting/resources/README.md | 42 ++++++ reporting/resources/report-helper.js | 2 +- reporting/resources/report.py | 134 ++++++++++++++++-- 5 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 reporting/resources/README.md diff --git a/lint.ignore b/lint.ignore index 72e26ff785294c..c735897fe388c4 100644 --- a/lint.ignore +++ b/lint.ignore @@ -56,6 +56,7 @@ W3C-TEST.ORG: .gitignore W3C-TEST.ORG: README.md W3C-TEST.ORG: */README.md W3C-TEST.ORG: docs/* +WEB-PLATFORM.TEST:*/README.md WEB-PLATFORM.TEST:docs/* CR AT EOL, INDENT TABS:docs/make.bat INDENT TABS:docs/Makefile diff --git a/reporting/path-absolute-endpoint.https.sub.html.sub.headers b/reporting/path-absolute-endpoint.https.sub.html.sub.headers index ec25b289449568..5bd5ae7f0f53a6 100644 --- a/reporting/path-absolute-endpoint.https.sub.html.sub.headers +++ b/reporting/path-absolute-endpoint.https.sub.html.sub.headers @@ -1,2 +1,2 @@ -Report-To: { "group": "csp-group", "max_age": 10886400, "endpoints": [{ "url": "/reporting/resources/report.py?id=d0d517bf-891b-457a-b970-8b2b2c81a0bf" }] } +Report-To: { "group": "csp-group", "max_age": 10886400, "endpoints": [{ "url": "/reporting/resources/report.py?reportID=d0d517bf-891b-457a-b970-8b2b2c81a0bf" }] } Content-Security-Policy: script-src 'self' 'unsafe-inline'; img-src 'none'; report-to csp-group diff --git a/reporting/resources/README.md b/reporting/resources/README.md new file mode 100644 index 00000000000000..d816ed0899767e --- /dev/null +++ b/reporting/resources/README.md @@ -0,0 +1,42 @@ +# Using the common report collector + +To send reports to the collector, configure the reporting API to POST reports +to the collector's URL. This can be same- or cross- origin with the reporting +document, as the collector will follow the CORS protocol. + +The collector supports both CSP Level 2 (report-uri) reports as well as +Reporting API reports. + +A GET request can be used to retrieve stored reports for analysis. + +Sent credentials are stored with the reports, and can be retrieved separately. + +CORS Notes: +* Preflight requests originating from www2.web-platform.test will be rejected. + This allows tests to ensure that cross-origin report uploads are not sent when + the endpoint does not support CORS. + +Supported GET parameters: + `op`: For GET requests, a string indicating the operation to perform (see + below for description of supported operations). Defaults to + `retrieve_report`. + `reportID`: A UUID to associate with the reports sent from this document. This + can be used to distinguish between reports from multiple documents, and to + provide multiple distinct endpoints for a single document. Either `reportID` + or `endpoint` must be provided. + `endpoint`: A string which will be used to generate a UUID to be used as the + reportID. Either `reportID` or `endpoint` must be provided. + `timeout`: The amount of time to wait, in seconds, before responding. Defaults + to 0.5s. + `min_count`: The minimum number of reports to return with the `retrieve_report` + operation. If there have been fewer than this many reports received, then an + empty report list will be returned instead. + +Operations: + `retrieve_report`: Returns all reports received so far for this reportID, as a + JSON-formatted list. If no reports have been received, an empty list will be + returned. + `retrieve_cookies`: Returns the cookies sent with the most recent reports for + this reportID, as a JSON-formatted object. + `retrieve_count`: Returns the number of POST requests for reports with this + reportID so far. diff --git a/reporting/resources/report-helper.js b/reporting/resources/report-helper.js index a20a9cd3811599..181d1970b182c8 100644 --- a/reporting/resources/report-helper.js +++ b/reporting/resources/report-helper.js @@ -3,7 +3,7 @@ function wait(ms) { } async function pollReports(endpoint, id) { - const res = await fetch(`${endpoint}?id=${id}`, {cache: 'no-store'}); + const res = await fetch(`${endpoint}?reportID=${id}`, {cache: 'no-store'}); const reports = []; if (res.status === 200) { for (const report of await res.json()) { diff --git a/reporting/resources/report.py b/reporting/resources/report.py index a3a0ee5ddba9de..796a1cb885816f 100644 --- a/reporting/resources/report.py +++ b/reporting/resources/report.py @@ -1,17 +1,127 @@ +import time import json +import re +import uuid + +from wptserve.utils import isomorphic_decode + +def retrieve_from_stash(request, key, timeout, default_value, min_count=None): + """Retrieve the set of reports for a given report ID. + + This will extract either the set of reports, credentials, or request count + from the stash (depending on the key passed in) and return it encoded as JSON. + + When retrieving reports, this will not return any reports until min_count + reports have been received. + + If timeout seconds elapse before the requested data can be found in the stash, + or before at least min_count reports are received, default_value will be + returned instead.""" + t0 = time.time() + while time.time() - t0 < timeout: + time.sleep(0.5) + with request.server.stash.lock: + value = request.server.stash.take(key=key) + if value is not None and (min_count is None or len(value) >= min_count): + request.server.stash.put(key=key, value=value) + # If the last report received looks like a CSP report-uri report, then + # extract it from the list and return it alone. (This is until the CSP + # tests are modified to expect a list of reports returned in all cases.) + if isinstance(value,list) and 'csp-report' in value[-1]: + value = value[-1] + return json.dumps(value) + + return default_value def main(request, response): - key = request.GET.first(b'id') - - # No CORS support for cross-origin reporting endpoints - if request.method == u'POST': - reports = request.server.stash.take(key) or [] - for report in json.loads(request.body): - reports.append(report) - request.server.stash.put(key, reports) - return b'done' + # Handle CORS preflight requests + if request.method == u'OPTIONS': + # Always reject preflights for one subdomain + if b"www2" in request.headers[b"Origin"]: + return (400, [], u"CORS preflight rejected for www2") + return [ + (b"Content-Type", b"text/plain"), + (b"Access-Control-Allow-Origin", b"*"), + (b"Access-Control-Allow-Methods", b"post"), + (b"Access-Control-Allow-Headers", b"Content-Type"), + ], u"CORS allowed" + + if b"reportID" in request.GET: + key = request.GET.first(b"reportID") + elif b"endpoint" in request.GET: + key = uuid.uuid5(uuid.NAMESPACE_OID, isomorphic_decode(request.GET[b'endpoint'])).urn.encode('ascii')[9:] + else: + response.status = 400 + return "Either reportID or endpoint parameter is required." + + # Cookie and count keys are derived from the report ID. + cookie_key = re.sub(b'^....', b'cccc', key) + count_key = re.sub(b'^....', b'dddd', key) + if request.method == u'GET': - return json.dumps(request.server.stash.take(key) or []) + try: + timeout = float(request.GET.first(b"timeout")) + except: + timeout = 0.5 + try: + min_count = int(request.GET.first(b"min_count")) + except: + min_count = 1 + + op = request.GET.first(b"op", b"") + if op in (b"retrieve_report", b""): + return [(b"Content-Type", b"application/json")], retrieve_from_stash(request, key, timeout, u'[]', min_count) + + if op == b"retrieve_cookies": + return [(b"Content-Type", b"application/json")], u"{ \"reportCookies\" : " + str(retrieve_from_stash(request, cookie_key, timeout, u"\"None\"")) + u"}" + + if op == b"retrieve_count": + return [(b"Content-Type", b"application/json")], json.dumps({u'report_count': str(retrieve_from_stash(request, count_key, timeout, 0))}) + + response.status = 400 + return "op parameter value not recognized." + + # Save cookies. + if len(request.cookies.keys()) > 0: + # Convert everything into strings and dump it into a dict. + temp_cookies_dict = {} + for dict_key in request.cookies.keys(): + temp_cookies_dict[isomorphic_decode(dict_key)] = str(request.cookies.get_list(dict_key)) + with request.server.stash.lock: + # Clear any existing cookie data for this request before storing new data. + request.server.stash.take(key=cookie_key) + request.server.stash.put(key=cookie_key, value=temp_cookies_dict) + + # Append new report(s). + new_reports = json.loads(request.body) + + # If the incoming report is a CSP report-uri report, then it will be a single + # dictionary rather than a list of reports. To handle this case, ensure that + # any non-list request bodies are wrapped in a list. + if not isinstance(new_reports, list): + new_reports = [new_reports] + + for report in new_reports: + report[u"metadata"] = { + u"content_type": isomorphic_decode(request.headers[b"Content-Type"]), + } + + with request.server.stash.lock: + reports = request.server.stash.take(key=key) + if reports is None: + reports = [] + reports.extend(new_reports) + request.server.stash.put(key=key, value=reports) + + # Increment report submission count. This tracks the number of times this + # reporting endpoint was contacted, rather than the total number of reports + # submitted, which can be seen from the length of the report list. + with request.server.stash.lock: + count = request.server.stash.take(key=count_key) + if count is None: + count = 0 + count += 1 + request.server.stash.put(key=count_key, value=count) - response.status = 400 - return b'invalid method' + # Return acknowledgement report. + return [(b"Content-Type", b"text/plain")], b"Recorded report " + request.body