From d4412cbedc89a98f056c4c3fc41642a017e083fa Mon Sep 17 00:00:00 2001 From: Arnout Engelen Date: Sat, 2 Mar 2024 10:17:16 +0100 Subject: [PATCH] WiP: reporting Towards #9 --- web/__init__.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ web/crud.py | 52 +++++++++++++++++++++++++++++++++++++++++- web/models.py | 8 +++++++ web/schemas.py | 3 ++- 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/web/__init__.py b/web/__init__.py index db730a3..e9e4634 100644 --- a/web/__init__.py +++ b/web/__init__.py @@ -1,3 +1,5 @@ +import json +import random import typing as t from fastapi import Depends, FastAPI, HTTPException from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer @@ -77,6 +79,17 @@ def get_drv(drv_hash: str, def get_drv_recap(drv_hash: str, db: Session = Depends(get_db)) -> schemas.DerivationAttestation: return get_drv_recap_or_404(db, drv_hash) +# Suggested rebuilds +@app.get("/reports/{name}/suggested") +def derivations_suggested_for_rebuilding(name: str, db: Session = Depends(get_db)): + report = crud.report(db, name) + if report == None: + raise HTTPException(status_code=404, detail="Report not found") + paths = report_out_paths(report) + suggestions = crud.suggest(db, paths) + random.shuffle(suggestions) + return suggestions[:10] + @app.post("/attestation/{drv_hash}") def record_attestation( drv_hash: str, @@ -93,4 +106,51 @@ def record_attestation( "Attestation accepted" } +def report_out_paths(report): + paths = [] + for component in report['components']: + for prop in component['properties']: + if prop['name'] == "nix:out_path": + paths.append(prop['value']) + return paths + +@app.get("/reports") +def reports(db: Session = Depends(get_db)): + reports = db.query(models.Report).all() + names = [] + for report in reports: + names.append(report.name) + return names + +@app.get("/reports/{name}") +def report( + name: str, + db: Session = Depends(get_db), +): + report = crud.report(db, name) + if report == None: + raise HTTPException(status_code=404, detail="Report not found") + paths = report_out_paths(report) + # TODO return as tree + return { + 'name': name, + 'root': report['metadata']['component']['name'], + 'results': crud.path_summaries(db, paths), + } + +@app.put("/reports/{name}") +def define_report( + name: str, + definition: schemas.ReportDefinition, + token: str = Depends(get_token), + db: Session = Depends(get_db), +): + user = crud.get_user_with_token(db, token) + if user == None: + raise HTTPException(status_code=401, detail="User not found") + print(name) + crud.define_report(db, name, definition.root) + return { + "Report defined" + } diff --git a/web/crud.py b/web/crud.py index a41f7a9..32ea5b9 100644 --- a/web/crud.py +++ b/web/crud.py @@ -1,4 +1,6 @@ -from sqlalchemy import values +import json + +from sqlalchemy import distinct, func, select, values from sqlalchemy.dialects.sqlite import insert from sqlalchemy.orm import Session from sqlalchemy.sql.functions import user @@ -25,8 +27,56 @@ def create_attestation(db: Session, drv_hash: str, output_hash_map: list[schemas )) db.commit() +def report(db: Session, name: str): + r = db.query(models.Report).filter_by(name=name).one_or_none() + if r == None: + return None + return json.loads(r.definition) + +def suggest(db: Session, paths): + # Derivations in the database might not match derivations on the rebuilder system. + # TODO: can this happen only for FODs or also for other derivations? + # TODO: Add enough metadata to the report so you know what to nix-instantiate to get all relevant drvs + # TODO: don't suggest nodes that have already been rebuilt by the current user + #stmt = select(models.Derivation.drv_hash, models.Attestation.output_path).join(models.Attestation).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path).having(func.count(models.Attestation.id) < 2) + #suggestions = [] + #for row in db.execute(stmt): + # suggestions.append(row._mapping['drv_hash']) + stmt = select(models.Attestation.output_path).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path).having(func.count(models.Attestation.id) < 2) + suggestions = [] + for row in db.execute(stmt): + suggestions.append(row._mapping['output_path']) + return suggestions +# TODO ideally this should take into account derivation paths as well as +# output paths, as for example for a fixed-output derivation we'd want +# to rebuild it with each different collection of inputs, not just once. +def path_summaries(db: Session, paths): + # TODO make sure multiple identical results from the same submitter + # don't get counted as 'successfully reproduced' + stmt = select(models.Attestation.output_path, func.count(models.Attestation.id), func.count(distinct(models.Attestation.output_hash))).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path) + results = {} + for result in db.execute(stmt): + output_path = result._mapping['output_path'] + n_results = result._mapping['count'] + distinct_results = result._mapping['count_1'] + if n_results < 2: + results[output_path] = "Not yet reproduced" + elif distinct_results == 1: + results[output_path] = "Successfully reproduced" + elif distinct_results < n_results: + results[output_path] = "Partially reproduced" + elif distinct_results == n_results: + results[output_path] = "Consistently nondeterministic" + return results +def define_report(db: Session, name: str, definition: dict): + db.execute( + insert(models.Report).values({ + "name": name, + "definition": json.dumps(definition), + })) + db.commit() def get_user_with_token(db: Session, token_val: str): token = db.query(models.Token).filter_by(value=token_val).one_or_none() diff --git a/web/models.py b/web/models.py index 8b17d86..5cb0d49 100644 --- a/web/models.py +++ b/web/models.py @@ -73,3 +73,11 @@ class Attestation(Base): derivation: Mapped["Derivation"] = relationship(back_populates="attestations") output_hash: Mapped[str] = mapped_column() +class Report(Base): + __tablename__ = "reports" + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column() + # For now we store the definition in a CycloneDX JSON blob, + # later we might want to normalize it into its own database + # structure. + definition: Mapped[str] = mapped_column() diff --git a/web/schemas.py b/web/schemas.py index b118a93..fb1f228 100644 --- a/web/schemas.py +++ b/web/schemas.py @@ -57,5 +57,6 @@ class DerivationAttestation(RootModel): } } - +class ReportDefinition(RootModel): + root: dict