diff --git a/ReadMe.md b/ReadMe.md index d7834b0..45cfe26 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -34,6 +34,55 @@ Run the server with `uvicorn web:app --reload` }; ``` +### Reporting + +At the time of writing only reports on run-time closures are supported. +Reporting is experimental and still expected to evolve, change, and +grow support for build-time closures as well. + +#### Defining a report + +You define a report by uploading a JSON CycloneDX SBOM as produced by +[nix-runtime-tree-to-sbom](https://codeberg.org/raboof/nix-runtime-tree-to-sbom): + +``` +$ nix-store -q --tree $(nix-build '' -A nixos.iso_gnome.x86_64-linux) > tree.txt +$ cat tree.txt | ~/dev/nix-runtime-tree-to-sbom/tree-to-cyclonedx.py > sbom.cdx.json +$ export HASH_COLLECTION_TOKEN=XYX # your token +$ curl -X PUT --data @sbom.cdx.json "http://localhost:8000/reports/gnome-iso-runtime" -H "Content-Type: application/json" -H "Authorization: Bearer $HASH_COLLECTION_TOKEN" +``` + +#### Populating the report + +If you want to populate the report with hashes from different builders (e.g. from +cache.nixos.org and from your own rebuilds), use separate tokens for the different +sources. + +##### With hashes from cache.nixos.org + +``` +$ nix shell .#utils +$ export HASH_COLLECTION_TOKEN=XYX # your token for the cache.nixos.org import +$ ./fetch-from-cache.sh +``` + +This script is still very much WIP, and will enter an infinite loop retrying failed fetches. + +##### By rebuilding + +Make sure you have the post-build hook and diff hook configured as documented above. + +TODO you have to make sure all derivations are available for building on your system - +is there a smart way to do that? + +``` +$ export HASH_COLLECTION_TOKEN=XYX # your token for the cache.nixos.org import +$ ./rebuilder.sh +``` + +This script is still very much WIP, and will enter an infinite loop retrying failed fetches. +You can run multiple rebuilders in parallel. + ## Related projects * [nix-reproducible-builds-report](https://codeberg.org/raboof/nix-reproducible-builds-report/) aka `r13y`, which generates the reports at [https://reproducible.nixos.org](https://reproducible.nixos.org). Ideally the [reporting](https://github.com/JulienMalka/nix-hash-collection/issues/9) feature can eventually replace the reports there. diff --git a/fetch-from-cache.sh b/fetch-from-cache.sh new file mode 100755 index 0000000..e9395d6 --- /dev/null +++ b/fetch-from-cache.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +REPORT=$1 +export HASH_COLLECTION_SERVER=http://localhost:8000 + +if [ "x" == "x$REPORT" ]; then + echo "Usage: $0 " + exit 1 +fi + +while true; do + curl -H "Authorization: Bearer $HASH_COLLECTION_TOKEN" $HASH_COLLECTION_SERVER/reports/$REPORT/suggested | jq .[] | head -50 | tr -d \" | while read out + do + echo $out + # TODO some/most of these can probably also be taken found in the + # local cache (with a cache.nixos.org signature), so perhaps take them from there? + copy-from-cache $out + done +done diff --git a/flake.lock b/flake.lock index b04f8f8..757f7fc 100644 --- a/flake.lock +++ b/flake.lock @@ -149,11 +149,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1708793639, - "narHash": "sha256-9wfI2UtdXZkBmy0ZET83ZOaea+ioSVB49m9ox46OYUw=", + "lastModified": 1710576986, + "narHash": "sha256-tcqTnbaMxQAKTAwY2V+1K+fOCucvro5t4P6b+dJBC+w=", "owner": "nixos", "repo": "nixpkgs", - "rev": "8e9536d9642e07a7706d3343ad367406b1a9d7dd", + "rev": "569825c56a98929a81b82e45fe2d391ba4d42634", "type": "github" }, "original": { diff --git a/rebuilder.sh b/rebuilder.sh new file mode 100755 index 0000000..9dd4e39 --- /dev/null +++ b/rebuilder.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +REPORT=$1 + +if [ "x" == "x$REPORT" ]; then + echo "Usage: $0 " + exit 1 +fi + +while true; do + curl -H "Authorization: Bearer $HASH_COLLECTION_TOKEN" http://localhost:8000/reports/$REPORT/suggested | jq .[] | head | tr -d \" | while read out + do + (nix derivation show $out || exit 1) | jq keys.[] | tr -d \" | while read drv + do + # TODO select the right output to rebuild? + nix-build $drv --check + done + done +done diff --git a/utils/src/bin/copy-from-cache.rs b/utils/src/bin/copy-from-cache.rs index c3c738d..b5ab92c 100644 --- a/utils/src/bin/copy-from-cache.rs +++ b/utils/src/bin/copy-from-cache.rs @@ -17,6 +17,11 @@ async fn fetch<'a>(out_path: &'a str) -> (String, OutputAttestation<'a>) { if response == "404" { panic!("Metadata for [{0}] not found on cache.nixos.org", out_path); } + + // TODO Deriver is not populated for static inputs, and may be super useful: + // the same output may have multiple derivers even for non-FOD derivations. + // Should we make it optional in the data model / API as well? + // https://github.com/JulienMalka/nix-hash-collection/issues/25 let deriver = Regex::new(r"(?m)Deriver: (.*).drv").unwrap() .captures(&response) .expect(format!("Deriver not found in metadata for [{0}]", out_path).as_str()) diff --git a/web/__init__.py b/web/__init__.py index 767fdc3..853b596 100644 --- a/web/__init__.py +++ b/web/__init__.py @@ -1,5 +1,7 @@ +import json +import random import typing as t -from fastapi import Depends, FastAPI, HTTPException +from fastapi import Depends, FastAPI, Header, HTTPException, Response from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer from fastapi.middleware.cors import CORSMiddleware from sqlalchemy.orm import Session @@ -77,6 +79,23 @@ def get_drv(drv_hash: str, def get_drv_recap(drv_hash: str, db: Session = Depends(get_db)) -> schemas.DerivationAttestation: return get_drv_recap_or_404(db, drv_hash) +# Suggested rebuilds +@app.get("/reports/{name}/suggested") +def derivations_suggested_for_rebuilding( + name: str, + token: str = Depends(get_token), + db: Session = Depends(get_db), +): + report = crud.report(db, name) + if report == None: + raise HTTPException(status_code=404, detail="Report not found") + paths = report_out_paths(report) + + user = crud.get_user_with_token(db, token) + suggestions = crud.suggest(db, paths, user) + random.shuffle(suggestions) + return suggestions[:50] + @app.post("/attestation/{drv_hash}") def record_attestation( drv_hash: str, @@ -93,4 +112,204 @@ def record_attestation( "Attestation accepted" } +@app.get("/attestations/by-output/{output_path}") +def attestations_by_out(output_path: str, db: Session = Depends(get_db)): + return db.query(models.Attestation).filter_by(output_path="/nix/store/"+output_path).all() + +def report_out_paths(report): + paths = [] + for component in report['components']: + for prop in component['properties']: + if prop['name'] == "nix:out_path": + paths.append(prop['value']) + return paths + +@app.get("/reports") +def reports(db: Session = Depends(get_db)): + reports = db.query(models.Report).all() + names = [] + for report in reports: + names.append(report.name) + return names + +def printtree(root, deps, results, cur_indent=0, seen=None): + if seen is None: + seen = {} + if root in seen: + return " " * cur_indent + "...\n" + seen[root] = True; + + result = " " * cur_indent + root[11:]; + if root in results: + result = result + " " + results[root] + "\n" + else: + result = result + "\n" + for dep in deps: + if dep['ref'] == root and 'dependsOn' in dep: + for d in dep['dependsOn']: + result += printtree(d, deps, results, cur_indent+2, seen) + #result = result + "\n " + d + return result + +def htmltree(root, deps, results): + def icon(result): + if result == "No builds": + return "❔ " + elif result == "One build": + return "❎ " + elif result == "Partially reproduced": + return "❕ " + elif result == "Successfully reproduced": + return "✅ " + elif result == "Consistently nondeterministic": + return "❌ " + else: + return "" + def generatetree(root, seen): + if root in seen: + return f'...' + seen[root] = True; + + result = f'' + if root in results: + result = result + f'' + icon(results[root]) + "" + root[44:] + " " + else: + result = result + root[44:] + result = result + "\n" + result = result + "
    " + for dep in deps: + if dep['ref'] == root and 'dependsOn' in dep: + for d in dep['dependsOn']: + result += f'
  • ' + result += generatetree(d, seen) + result += "
  • " + result = result + "
" + return result + tree = generatetree(root, {}) + return ''' + + + + + ''' + f''' + +
    +
  • + {tree} +
  • +
+ + +''' + +@app.get("/reports/{name}") +def report( + name: str, + accept: t.Optional[str] = Header(default="*/*"), + db: Session = Depends(get_db), +): + report = crud.report(db, name) + if report == None: + raise HTTPException(status_code=404, detail="Report not found") + + if 'application/vnd.cyclonedx+json' in accept: + return Response( + content=json.dumps(report), + media_type='application/vnd.cyclonedx+json') + + paths = report_out_paths(report) + root = report['metadata']['component']['bom-ref'] + results = crud.path_summaries(db, paths) + + if 'text/html' in accept: + return Response( + content=htmltree(root, report['dependencies'], results), + media_type='text/html') + else: + return Response( + content=printtree(root, report['dependencies'], results), + media_type='text/plain') + +@app.put("/reports/{name}") +def define_report( + name: str, + definition: schemas.ReportDefinition, + token: str = Depends(get_token), + db: Session = Depends(get_db), +): + user = crud.get_user_with_token(db, token) + if user == None: + raise HTTPException(status_code=401, detail="User not found") + crud.define_report(db, name, definition.root) + return { + "Report defined" + } diff --git a/web/crud.py b/web/crud.py index ae67822..12ebb88 100644 --- a/web/crud.py +++ b/web/crud.py @@ -1,4 +1,6 @@ -from sqlalchemy import values +import json + +from sqlalchemy import distinct, func, select, values from sqlalchemy.dialects.sqlite import insert from sqlalchemy.orm import Session from sqlalchemy.sql.functions import user @@ -25,8 +27,66 @@ def create_attestation(db: Session, drv_hash: str, output_hash_map: list[schemas )) db.commit() +def report(db: Session, name: str): + r = db.query(models.Report).filter_by(name=name).one_or_none() + if r == None: + return None + return json.loads(r.definition) + +def suggest(db: Session, paths, user_id): + # Derivations in the database might not match derivations on the rebuilder system. + # TODO: can this happen only for FODs or also for other derivations? + # TODO: Add enough metadata to the report so you know what to nix-instantiate to get all relevant drvs + # TODO: don't suggest nodes that have already been rebuilt by the current user + #stmt = select(models.Derivation.drv_hash, models.Attestation.output_path).join(models.Attestation).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path).having(func.count(models.Attestation.id) < 2) + #suggestions = [] + #for row in db.execute(stmt): + # suggestions.append(row._mapping['drv_hash']) + candidates = paths + if user: + for attestation in db.query(models.Attestation).filter(models.Attestation.output_path.in_(candidates)).filter_by(user_id=user_id).all(): + if attestation.output_path in candidates: + candidates.remove(attestation.output_path) + # TODO don't consider attestations that have been built twice by the same user + # as 'rebuilt' + stmt = select(models.Attestation.output_path).where(models.Attestation.output_path.in_(candidates)).group_by(models.Attestation.output_path).having(func.count(models.Attestation.id) > 1) + for row in db.execute(stmt): + candidates.remove(row._mapping['output_path']) + return candidates +# TODO ideally this should take into account derivation paths as well as +# output paths, as for example for a fixed-output derivation we'd want +# to rebuild it with each different collection of inputs, not just once. +# OTOH, it seems caches may also have different derivers for non-FODs? +# To look into further: https://github.com/NixOS/nix/issues/7562 +def path_summaries(db: Session, paths): + # TODO make sure multiple identical results from the same submitter + # don't get counted as 'successfully reproduced' + stmt = select(models.Attestation.output_path, func.count(models.Attestation.id), func.count(distinct(models.Attestation.output_hash))).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path) + results = {} + for output_path in paths: + results[output_path] = "No builds" + for result in db.execute(stmt): + output_path = result._mapping['output_path'] + n_results = result._mapping['count'] + distinct_results = result._mapping['count_1'] + if n_results == 1: + results[output_path] = "One build" + elif distinct_results == 1: + results[output_path] = "Successfully reproduced" + elif distinct_results < n_results: + results[output_path] = "Partially reproduced" + elif distinct_results == n_results: + results[output_path] = "Consistently nondeterministic" + return results +def define_report(db: Session, name: str, definition: dict): + db.execute( + insert(models.Report).values({ + "name": name, + "definition": json.dumps(definition), + })) + db.commit() def get_user_with_token(db: Session, token_val: str): token = db.query(models.Token).filter_by(value=token_val).one_or_none() diff --git a/web/models.py b/web/models.py index 8b17d86..5cb0d49 100644 --- a/web/models.py +++ b/web/models.py @@ -73,3 +73,11 @@ class Attestation(Base): derivation: Mapped["Derivation"] = relationship(back_populates="attestations") output_hash: Mapped[str] = mapped_column() +class Report(Base): + __tablename__ = "reports" + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column() + # For now we store the definition in a CycloneDX JSON blob, + # later we might want to normalize it into its own database + # structure. + definition: Mapped[str] = mapped_column() diff --git a/web/schemas.py b/web/schemas.py index ba83c22..d1cf374 100644 --- a/web/schemas.py +++ b/web/schemas.py @@ -57,5 +57,6 @@ class DerivationAttestation(RootModel): } } - +class ReportDefinition(RootModel): + root: dict