Skip to content

Commit

Permalink
prototype of index sections
Browse files Browse the repository at this point in the history
work in progress
  • Loading branch information
mschoch committed Aug 31, 2021
1 parent fc8a89f commit 674c535
Show file tree
Hide file tree
Showing 9 changed files with 595 additions and 23 deletions.
86 changes: 86 additions & 0 deletions document/field_numeric_range.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package document

import (
"encoding/binary"
"math"

"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)

type NumericRangeField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value []byte
}

func (n *NumericRangeField) Size() int {
return reflectStaticSizeNumericField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfPtr
}

func (n *NumericRangeField) Name() string {
return n.name
}

func (n *NumericRangeField) ArrayPositions() []uint64 {
return n.arrayPositions
}

func (n *NumericRangeField) Options() index.FieldIndexingOptions {
return n.options
}

func (n *NumericRangeField) EncodedFieldType() byte {
return 'r'
}

func (n *NumericRangeField) AnalyzedLength() int {
return 0
}

func (n *NumericRangeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return nil
}

func (n *NumericRangeField) Analyze() {

}

func (n *NumericRangeField) Value() []byte {
return n.value
}

func (n *NumericRangeField) NumPlainTextBytes() uint64 {
return 0
}

func (n *NumericRangeField) Number() (float64, error) {
return math.Float64frombits(binary.BigEndian.Uint64(n.value)), nil
}

func NewNumericRangeField(name string, arrayPositions []uint64, number float64) *NumericRangeField {
value := make([]byte, 8)
binary.BigEndian.PutUint64(value, math.Float64bits(number))
return &NumericRangeField{
name: name,
arrayPositions: arrayPositions,
value: value,
}
}
4 changes: 0 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ require (
github.com/blevesearch/snowballstem v0.9.0
github.com/blevesearch/upsidedown_store_api v1.0.1
github.com/blevesearch/vellum v1.0.5
github.com/blevesearch/zapx/v11 v11.2.2
github.com/blevesearch/zapx/v12 v12.2.2
github.com/blevesearch/zapx/v13 v13.2.2
github.com/blevesearch/zapx/v14 v14.2.2
github.com/blevesearch/zapx/v15 v15.2.2
github.com/couchbase/moss v0.1.0
github.com/golang/protobuf v1.3.2
Expand Down
11 changes: 0 additions & 11 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ github.com/RoaringBitmap/roaring v0.7.3/go.mod h1:jdT9ykXwHFNdJbEtxePexlFYH9LXuc
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/blevesearch/bleve_index_api v1.0.0/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4=
github.com/blevesearch/bleve_index_api v1.0.1 h1:nx9++0hnyiGOHJwQQYfsUGzpRdEVE5LsylmmngQvaFk=
github.com/blevesearch/bleve_index_api v1.0.1/go.mod h1:fiwKS0xLEm+gBRgv5mumf0dhgFr2mDgZah1pqv1c1M4=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/mmap-go v1.0.2 h1:JtMHb+FgQCTTYIhtMvimw15dJwu1Y5lrZDMOFXVWPk0=
Expand All @@ -23,14 +20,6 @@ github.com/blevesearch/upsidedown_store_api v1.0.1 h1:1SYRwyoFLwG3sj0ed89RLtM15a
github.com/blevesearch/upsidedown_store_api v1.0.1/go.mod h1:MQDVGpHZrpe3Uy26zJBf/a8h0FZY6xJbthIMm8myH2Q=
github.com/blevesearch/vellum v1.0.5 h1:L5dJ7hKauRVbuH7I8uqLeSK92CPPY6FfrbAmLhAug8A=
github.com/blevesearch/vellum v1.0.5/go.mod h1:atE0EH3fvk43zzS7t1YNdNC7DbmcC3uz+eMD5xZ2OyQ=
github.com/blevesearch/zapx/v11 v11.2.2 h1:yeHRnGA4UPxVm1roONbp7VRm+SUx95AleE4rU8w4pc4=
github.com/blevesearch/zapx/v11 v11.2.2/go.mod h1:qunXXAB8awrvPgnHdqbPFMW01N93bhKFxkEWxYQgp8w=
github.com/blevesearch/zapx/v12 v12.2.2 h1:aK6r0DbMMI8+MnrqkmFwWxUr8ZwVwQf8owbdVv7uhKs=
github.com/blevesearch/zapx/v12 v12.2.2/go.mod h1:6reJkgolYR1r7GC6SwbuRGhvMWin+Ou/n2Cd7DdvYzY=
github.com/blevesearch/zapx/v13 v13.2.2 h1:6oa7kZhywrRT3CeuN0bIZC6EviQkbPOQpvdR5pnxCks=
github.com/blevesearch/zapx/v13 v13.2.2/go.mod h1:EeLDRSUIMxBFwD4hZxVioHLR7gpY4VF7kdUoImp4Vy8=
github.com/blevesearch/zapx/v14 v14.2.2 h1:lRZCBvQIByW8F+mCoY4uA6Uen2DCj7K+wCh5nVKNxew=
github.com/blevesearch/zapx/v14 v14.2.2/go.mod h1:zb01unR63/DfV2nvyAvgj64K8AKtj7WP6w23jtuHntQ=
github.com/blevesearch/zapx/v15 v15.2.2 h1:5+oWWAQTV3M0UNor05qrZujzxIXVuwtYU3ppZ7Y1aNI=
github.com/blevesearch/zapx/v15 v15.2.2/go.mod h1:I4QVJ432LKkZyNK1kZkh3OweKa+NSblZzIF0YSSExak=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
Expand Down
8 changes: 0 additions & 8 deletions index/scorch/segment_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ import (

segment "github.com/blevesearch/scorch_segment_api/v2"

zapv11 "github.com/blevesearch/zapx/v11"
zapv12 "github.com/blevesearch/zapx/v12"
zapv13 "github.com/blevesearch/zapx/v13"
zapv14 "github.com/blevesearch/zapx/v14"
zapv15 "github.com/blevesearch/zapx/v15"
)

Expand Down Expand Up @@ -72,10 +68,6 @@ var defaultSegmentPlugin SegmentPlugin
func init() {
ResetSegmentPlugins()
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, true)
RegisterSegmentPlugin(&zapv14.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv13.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv12.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv11.ZapPlugin{}, false)
}

func ResetSegmentPlugins() {
Expand Down
36 changes: 36 additions & 0 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,42 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
return next.ID, nil
}

func (i *IndexSnapshot) NumericRangeReader(field string, min, max float64, inclusiveMin, inclusiveMax bool) (
index.TermFieldReader, error) {

rv := &IndexSnapshotNumericRangeReader{
field: field,
min: min,
max: max,
inclusiveMin: inclusiveMin,
inclusiveMax: inclusiveMax,
postings: make([]segment.PostingsList, len(i.segment)),
iterators: make([]segment.PostingsIterator, len(i.segment)),
snapshot: i,
// FIXME if we add recycling we have to rest all fields
}


for i, seg := range i.segment {

if sn, ok := seg.segment.(segment.SegmentNumeric); ok {
pl, err := sn.InRange(field, min, max, inclusiveMin, inclusiveMax, seg.deleted)
if err != nil {
return nil, err
}
rv.postings[i] = pl
rv.iterators[i] = pl.Iterator(false, false, false, rv.iterators[i])
} else {
// FIXME handle this case with empty postings list???
fmt.Println("oops not doing anything")
}
}

// add stat tracking here

return rv, nil
}

func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
rv := i.allocTermFieldReaderDicts(field)
Expand Down
134 changes: 134 additions & 0 deletions index/scorch/snapshot_index_nr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scorch

import (
"bytes"
"fmt"
"sync/atomic"

index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)

type IndexSnapshotNumericRangeReader struct {
min, max float64
inclusiveMin, inclusiveMax bool


term []byte
field string
snapshot *IndexSnapshot
dicts []segment.TermDictionary
postings []segment.PostingsList
iterators []segment.PostingsIterator
segmentOffset int

currPosting segment.Posting
currID index.IndexInternalID
recycle bool
}

func (i *IndexSnapshotNumericRangeReader) Size() int {
return 0
}

func (i *IndexSnapshotNumericRangeReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
rv := preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
// find the next hit
for i.segmentOffset < len(i.iterators) {
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return nil, err
}
if next != nil {
// make segment number into global number by adding offset
globalOffset := i.snapshot.offsets[i.segmentOffset]
nnum := next.Number()
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)

i.currID = rv.ID
i.currPosting = next
return rv, nil
}
i.segmentOffset++
}
return nil, nil
}

func (i *IndexSnapshotNumericRangeReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
// FIXME do something better
// for now, if we need to seek backwards, then restart from the beginning
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
i2, err := i.snapshot.NumericRangeReader(i.field, i.min, i.max, i.inclusiveMin, i.inclusiveMax)
if err != nil {
return nil, err
}
// close the current term field reader before replacing it with a new one
_ = i.Close()
*i = *(i2.(*IndexSnapshotNumericRangeReader))
}
num, err := docInternalToNumber(ID)
if err != nil {
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
}
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
if segIndex >= len(i.snapshot.segment) {
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
segIndex, len(i.snapshot.segment))
}
// skip directly to the target segment
i.segmentOffset = segIndex
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
if err != nil {
return nil, err
}
if next == nil {
// we jumped directly to the segment that should have contained it
// but it wasn't there, so reuse Next() which should correctly
// get the next hit after it (we moved i.segmentOffset)
return i.Next(preAlloced)
}

if preAlloced == nil {
preAlloced = &index.TermFieldDoc{}
}
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
i.snapshot.offsets[segIndex])
i.currID = preAlloced.ID
i.currPosting = next
return preAlloced, nil
}

func (i *IndexSnapshotNumericRangeReader) Count() uint64 {
var rv uint64
for _, posting := range i.postings {
rv += posting.Count()
}
return rv
}

func (i *IndexSnapshotNumericRangeReader) Close() error {
if i.snapshot != nil {
atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))

// FIXME disabling any recycling for POC
//i.snapshot.recycleTermFieldReader(i)
}
return nil
}
Loading

0 comments on commit 674c535

Please sign in to comment.