Skip to content

Commit

Permalink
Merge branch 'main' into hugo-docs
Browse files Browse the repository at this point in the history
  • Loading branch information
philrz committed Dec 13, 2024
2 parents b5f9349 + a32651f commit 7dbf19f
Show file tree
Hide file tree
Showing 47 changed files with 653 additions and 159 deletions.
18 changes: 8 additions & 10 deletions cmd/super/dev/vector/query/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package query
import (
"errors"
"flag"
"os"

"github.com/brimdata/super"
"github.com/brimdata/super/cli/outputflags"
Expand All @@ -11,10 +12,9 @@ import (
"github.com/brimdata/super/pkg/charm"
"github.com/brimdata/super/pkg/storage"
"github.com/brimdata/super/runtime"
"github.com/brimdata/super/runtime/vcache"
"github.com/brimdata/super/zbuf"
"github.com/brimdata/super/zio"
"github.com/segmentio/ksuid"
"github.com/brimdata/super/zio/vngio"
)

var spec = &charm.Spec{
Expand Down Expand Up @@ -57,23 +57,21 @@ func (c *Command) Run(args []string) error {
return errors.New("usage: query followed by a single path argument of VNG data")
}
text := args[0]
uri, err := storage.ParseURI(args[1])
f, err := os.Open(args[1])
if err != nil {
return err
}
local := storage.NewLocalEngine()
cache := vcache.NewCache(local)
object, err := cache.Fetch(ctx, uri, ksuid.Nil)
rctx := runtime.NewContext(ctx, super.NewContext())
r, err := vngio.NewVectorReader(ctx, rctx.Zctx, f, nil)
if err != nil {
return err
}
defer object.Close()
rctx := runtime.NewContext(ctx, super.NewContext())
puller, err := compiler.VectorCompile(rctx, text, object)
defer r.Pull(true)
puller, err := compiler.VectorCompile(rctx, text, r)
if err != nil {
return err
}
writer, err := c.outputFlags.Open(ctx, local)
writer, err := c.outputFlags.Open(ctx, storage.NewLocalEngine())
if err != nil {
return err
}
Expand Down
45 changes: 39 additions & 6 deletions compiler/kernel/vexpr.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import (
"errors"
"fmt"

"github.com/brimdata/super"
"github.com/brimdata/super/compiler/dag"
"github.com/brimdata/super/pkg/field"
"github.com/brimdata/super/runtime/sam/expr"
"github.com/brimdata/super/runtime/sam/expr/function"
vamexpr "github.com/brimdata/super/runtime/vam/expr"
vamfunction "github.com/brimdata/super/runtime/vam/expr/function"
"github.com/brimdata/super/zson"
"golang.org/x/text/unicode/norm"
)

func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) {
Expand All @@ -27,8 +30,8 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) {
return vamexpr.NewLiteral(val), nil
//case *dag.Var:
// return vamexpr.NewVar(e.Slot), nil
//case *dag.Search:
// return b.compileSearch(e)
case *dag.Search:
return b.compileVamSearch(e)
case *dag.This:
return vamexpr.NewDottedExpr(b.zctx(), field.Path(e.Path)), nil
case *dag.Dot:
Expand All @@ -47,8 +50,8 @@ func (b *Builder) compileVamExpr(e dag.Expr) (vamexpr.Evaluator, error) {
return b.compileVamCall(e)
//case *dag.RegexpMatch:
// return b.compileVamRegexpMatch(e)
//case *dag.RegexpSearch:
// return b.compileVamRegexpSearch(e)
case *dag.RegexpSearch:
return b.compileVamRegexpSearch(e)
case *dag.RecordExpr:
return b.compileVamRecordExpr(e)
//case *dag.SetExpr:
Expand Down Expand Up @@ -100,8 +103,8 @@ func (b *Builder) compileVamBinary(e *dag.BinaryExpr) (vamexpr.Evaluator, error)
return vamexpr.NewLogicalAnd(b.zctx(), lhs, rhs), nil
case "or":
return vamexpr.NewLogicalOr(b.zctx(), lhs, rhs), nil
//case "in": XXX TBD
// return vamexpr.NewIn(b.zctx(), lhs, rhs), nil
case "in":
return vamexpr.NewIn(b.zctx(), lhs, rhs), nil
case "==", "!=", "<", "<=", ">", ">=":
return vamexpr.NewCompare(b.zctx(), lhs, rhs, op), nil
case "+", "-", "*", "/", "%":
Expand Down Expand Up @@ -244,6 +247,36 @@ func (b *Builder) compileVamRecordExpr(e *dag.RecordExpr) (vamexpr.Evaluator, er
return vamexpr.NewRecordExpr(b.zctx(), elems), nil
}

func (b *Builder) compileVamRegexpSearch(search *dag.RegexpSearch) (vamexpr.Evaluator, error) {
e, err := b.compileVamExpr(search.Expr)
if err != nil {
return nil, err
}
re, err := expr.CompileRegexp(search.Pattern)
if err != nil {
return nil, err
}
return vamexpr.NewSearchRegexp(re, e), nil
}

func (b *Builder) compileVamSearch(search *dag.Search) (vamexpr.Evaluator, error) {
val, err := zson.ParseValue(b.zctx(), search.Value)
if err != nil {
return nil, err
}
e, err := b.compileVamExpr(search.Expr)
if err != nil {
return nil, err
}
if super.TypeUnder(val.Type()) == super.TypeString {
// Do a grep-style substring search instead of an
// exact match on each value.
term := norm.NFC.Bytes(val.Bytes())
return vamexpr.NewSearchString(string(term), e), nil
}
return vamexpr.NewSearch(search.Text, val, e), nil
}

func (b *Builder) compileVamArrayExpr(e *dag.ArrayExpr) (vamexpr.Evaluator, error) {
elems, err := b.compileVamListElems(e.Elems)
if err != nil {
Expand Down
5 changes: 2 additions & 3 deletions compiler/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"github.com/brimdata/super/runtime/sam/op"
"github.com/brimdata/super/runtime/vam"
vamop "github.com/brimdata/super/runtime/vam/op"
"github.com/brimdata/super/runtime/vcache"
"github.com/brimdata/super/vector"
"github.com/brimdata/super/zbuf"
"github.com/brimdata/super/zio"
)
Expand Down Expand Up @@ -118,7 +118,7 @@ func bundleOutputs(rctx *runtime.Context, outputs map[string]zbuf.Puller) zbuf.P
// where the entire query is vectorizable. It does not call optimize
// nor does it compute the demand of the query to prune the projection
// from the vcache.
func VectorCompile(rctx *runtime.Context, query string, object *vcache.Object) (zbuf.Puller, error) {
func VectorCompile(rctx *runtime.Context, query string, puller vector.Puller) (zbuf.Puller, error) {
ast, err := parser.ParseQuery(query)
if err != nil {
return nil, err
Expand All @@ -132,7 +132,6 @@ func VectorCompile(rctx *runtime.Context, query string, object *vcache.Object) (
panic("DAG assumptions violated")
}
entry = entry[1:]
puller := vam.NewVectorProjection(rctx.Zctx, object, nil) //XXX project all
builder := kernel.NewBuilder(rctx, env)
outputs, err := builder.BuildWithPuller(entry, puller)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/bytes.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? 0x or 0x0e00924c694c98b6c11ef56f025f3255904f4e45'

vector: true

input: &input |
{a:0x}
{a:0x0e00924c694c98b6c11ef56f025f3255904f4e45}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/esc-quote.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? "foo\"bar"'

vector: true

input: |
{s:"foo\"bar"}
{s:"foobar"}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/glob-numeric.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? *1'

vector: true

input: |
"1"
"a1"
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/in-requires-space.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# longer doing this
zed: '? x==1 inaction'

vector: true

input: |
{x:1,text:"inaction"}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/ipv6.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? fe80::58d2:2d09:e8cb:a8ad OR ::58d2:2d09:e8cb:a8ad OR 2d09:e8cb:a8ad:: OR ::'

vector: true

input: &input |
{a:fe80::58d2:2d09:e8cb:a8ad}
{a:::58d2:2d09:e8cb:a8ad}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/leading-quotation-mark.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? \"foo'

vector: true

input: |
{s:"foo"}
{s:"\"foo"}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/match-double.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? grep("a") grep("b")'

vector: true

input: |
{s1:"a",s2:"b"}
{s1:"b",s2:"a"}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/match-parentheses.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? (10.0.0.0/8 or 172.16.0.0/12 or 192.168.0.0/16)'

vector: true

input: |
{a:1.1.1.1,b:172.16.0.1}
{a:192.168.0.1,b:2.2.2.2}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/match.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: grep("a")

vector: true

input: |
{s1:"a",s2:"b"}
{s1:"b",s2:"a"}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/search.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '?bar'

vector: true

input: |
{s1:"foo",s2:"bar"}
{s1:"foo",s2:null(string)}
Expand Down
2 changes: 2 additions & 0 deletions compiler/parser/ztests/unicode-keyword.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: '? bjørndal'

vector: true

input: '"bjørndal"'

output: |
Expand Down
2 changes: 2 additions & 0 deletions compiler/ztests/regexp-search.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
zed: yield grep(*foo*)

vector: true

input: |
"foo"
1
Expand Down
2 changes: 2 additions & 0 deletions compiler/ztests/search-regexp-not-glob.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
zed: |
? /.*/
vector: true

input: &input |
"a"

Expand Down
2 changes: 2 additions & 0 deletions compiler/ztests/search-type-value.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
zed: |
? <{x:int64}>
vector: true

input: <int64> <string> <{x:int64}>

output: |
Expand Down
2 changes: 1 addition & 1 deletion docs/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ or connections to
[schema registries](https://docs.confluent.io/platform/current/schema-registry/index.html).

A SuperDB data lake is completely self-contained, requiring no auxiliary databases
(like the [Hive metastore](https://cwiki.apache.org/confluence/display/hive/design))
(like the [Hive metastore](https://hive.apache.org/development/gettingstarted))
or other third-party services to interpret the lake data.
Once copied, a new service can be instantiated by pointing a `super db serve`
at the copy of the lake.
Expand Down
2 changes: 2 additions & 0 deletions runtime/sam/expr/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,8 @@ func (c *Compare) Eval(ectx Context, this super.Value) super.Value {
return c.result(bytes.Compare(super.DecodeBytes(lhs.Bytes()), super.DecodeBytes(rhs.Bytes())))
case lid == super.IDString:
return c.result(cmp.Compare(super.DecodeString(lhs.Bytes()), super.DecodeString(rhs.Bytes())))
case lid == super.IDIP:
return c.result(super.DecodeIP(lhs.Bytes()).Compare(super.DecodeIP(rhs.Bytes())))
default:
if bytes.Equal(lhs.Bytes(), rhs.Bytes()) {
return c.result(0)
Expand Down
72 changes: 61 additions & 11 deletions runtime/sam/expr/fieldnamefinder.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,24 @@ import (
)

type FieldNameFinder struct {
checkedIDs big.Int
fieldNameIter FieldNameIter
caseFinder *stringsearch.CaseFinder
checkedIDs big.Int
fnm *FieldNameMatcher
}

func NewFieldNameFinder(pattern string) *FieldNameFinder {
return &FieldNameFinder{caseFinder: stringsearch.NewCaseFinder(pattern)}
caseFinder := stringsearch.NewCaseFinder(pattern)
pred := func(b []byte) bool {
return caseFinder.Next(byteconv.UnsafeString(b)) != -1
}
return &FieldNameFinder{fnm: NewFieldNameMatcher(pred)}
}

// Find returns true if buf, which holds a sequence of ZNG value messages, might
// contain a record with a field whose fully-qualified name (e.g., a.b.c)
// matches the pattern. Find also returns true if it encounters an error.
func (f *FieldNameFinder) Find(zctx *super.Context, buf []byte) bool {
f.checkedIDs.SetInt64(0)
clear(f.fnm.checkedIDs)
for len(buf) > 0 {
id, idLen := binary.Uvarint(buf)
if idLen <= 0 {
Expand All @@ -40,18 +44,64 @@ func (f *FieldNameFinder) Find(zctx *super.Context, buf []byte) bool {
if err != nil {
return true
}
tr, ok := super.TypeUnder(t).(*super.TypeRecord)
if !ok {
if f.fnm.Match(t) {
return true
}
for f.fieldNameIter.Init(tr); !f.fieldNameIter.Done(); {
name := f.fieldNameIter.Next()
if f.caseFinder.Next(byteconv.UnsafeString(name)) != -1 {
return true
}
return false
}

type FieldNameMatcher struct {
pred func([]byte) bool
checkedIDs map[int]bool
fni FieldNameIter
}

func NewFieldNameMatcher(pred func([]byte) bool) *FieldNameMatcher {
return &FieldNameMatcher{pred: pred, checkedIDs: map[int]bool{}}
}

func (f *FieldNameMatcher) Match(typ super.Type) bool {
id := typ.ID()
match, ok := f.checkedIDs[id]
if ok {
return match
}
switch typ := super.TypeUnder(typ).(type) {
case *super.TypeRecord:
for f.fni.Init(typ); !f.fni.Done() && !match; {
match = f.pred(f.fni.Next())
}
if match {
break
}
for _, field := range typ.Fields {
match = f.Match(field.Type)
if match {
break
}
}
case *super.TypeArray:
match = f.Match(typ.Type)
case *super.TypeSet:
match = f.Match(typ.Type)
case *super.TypeMap:
match = f.Match(typ.KeyType)
if !match {
match = f.Match(typ.ValType)
}
case *super.TypeUnion:
for _, t := range typ.Types {
match = f.Match(t)
if match {
break
}
}
case *super.TypeError:
match = f.Match(typ.Type)
}
return false
f.checkedIDs[id] = match
return match
}

type FieldNameIter struct {
Expand Down
Loading

0 comments on commit 7dbf19f

Please sign in to comment.