From a773f5c43f8993f00f242d710b0673e6c360daaf Mon Sep 17 00:00:00 2001 From: Steven McCanne Date: Mon, 13 Feb 2023 16:00:37 -0800 Subject: [PATCH] improve UX for metadata queries This commit improves the UX for metadata queries by using the metadata type name to distinguish between commit-level queries and pool-level queries rather than requiring the presence of an @branch commit tag for commit-level queries. This way you can query the objects on the main branch of a pool without specifying @main. We also fixed a bug in the optimization logic where the layout of metadata queries was presumed to be the same layout of the pool. The fix is to presume the layout of metadata queries is unknown. --- compiler/ast/dag/operator.go | 19 ++++++++++++++++++ compiler/optimizer/optimizer.go | 4 +++- compiler/semantic/op.go | 35 ++++++++++++++++++++++----------- lake/ztests/meta.yaml | 2 +- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/compiler/ast/dag/operator.go b/compiler/ast/dag/operator.go index bd0223ea93..10936c602c 100644 --- a/compiler/ast/dag/operator.go +++ b/compiler/ast/dag/operator.go @@ -200,6 +200,25 @@ type ( } ) +var LakeMetas = map[string]struct{}{ + "branches": {}, + "index_rules": {}, + "pools": {}, +} + +var PoolMetas = map[string]struct{}{ + "branches": {}, +} + +var CommitMetas = map[string]struct{}{ + "indexes": {}, + "log": {}, + "objects": {}, + "partitions": {}, + "rawlog": {}, + "vectors": {}, +} + type Source interface { Source() } diff --git a/compiler/optimizer/optimizer.go b/compiler/optimizer/optimizer.go index 283bf889bf..40eb800ecb 100644 --- a/compiler/optimizer/optimizer.go +++ b/compiler/optimizer/optimizer.go @@ -173,8 +173,10 @@ func (o *Optimizer) getLayout(s dag.Source, parent order.Layout) (order.Layout, return s.Layout, nil case *dag.HTTP: return s.Layout, nil - case *dag.Pool, *dag.LakeMeta, *dag.PoolMeta, *dag.CommitMeta: + case *dag.Pool: return o.source.Layout(o.ctx, s), nil + case *dag.LakeMeta, *dag.PoolMeta, *dag.CommitMeta: + return order.Nil, nil case *dag.Pass: return parent, nil case *kernel.Reader: diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index 4d5cdd04ab..cb8d66f9c9 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -165,9 +165,13 @@ func semPoolWithName(ctx context.Context, scope *Scope, p *ast.Pool, poolName st commit = head.Branch } if poolName == "" { - if p.Spec.Meta == "" { + meta := p.Spec.Meta + if meta == "" { return nil, errors.New("pool name missing") } + if _, ok := dag.LakeMetas[meta]; !ok { + return nil, fmt.Errorf("unknown lake metadata type %q in from operator", meta) + } return &dag.LakeMeta{ Kind: "LakeMeta", Meta: p.Spec.Meta, @@ -176,9 +180,7 @@ func semPoolWithName(ctx context.Context, scope *Scope, p *ast.Pool, poolName st // If a name appears as an 0x bytes ksuid, convert it to the // ksuid string form since the backend doesn't parse the 0x format. poolID, err := lakeparse.ParseID(poolName) - if err == nil { - poolName = poolID.String() - } else { + if err != nil { poolID, err = ds.PoolID(ctx, poolName) if err != nil { return nil, err @@ -203,20 +205,29 @@ func semPoolWithName(ctx context.Context, scope *Scope, p *ast.Pool, poolName st } } } - if p.Spec.Meta != "" { - if commit != "" { + if meta := p.Spec.Meta; meta != "" { + if _, ok := dag.CommitMetas[meta]; ok { + if commitID == ksuid.Nil { + commitID, err = ds.CommitObject(ctx, poolID, "main") + if err != nil { + return nil, err + } + } return &dag.CommitMeta{ Kind: "CommitMeta", - Meta: p.Spec.Meta, + Meta: meta, Pool: poolID, Commit: commitID, }, nil } - return &dag.PoolMeta{ - Kind: "PoolMeta", - Meta: p.Spec.Meta, - ID: poolID, - }, nil + if _, ok := dag.PoolMetas[meta]; ok { + return &dag.PoolMeta{ + Kind: "PoolMeta", + Meta: meta, + ID: poolID, + }, nil + } + return nil, fmt.Errorf("unknown metadata type %q in from operator", meta) } if commitID == ksuid.Nil { // This trick here allows us to default to the main branch when diff --git a/lake/ztests/meta.yaml b/lake/ztests/meta.yaml index 915e48295d..aca78a085d 100644 --- a/lake/ztests/meta.yaml +++ b/lake/ztests/meta.yaml @@ -8,7 +8,7 @@ script: | zed query -Z 'from :pools | drop id | sort name | drop ts' echo === zed query -Z 'from poolA@main:objects | cut nameof(this),meta' - zed query -Z 'from poolA@main:log | cut nameof(this) | drop ts' + zed query -Z 'from poolA:log | cut nameof(this) | drop ts' echo === zed index create -q Rule field a zed query -Z 'from :index_rules | nameof:=nameof(this) | drop ts,id'