Skip to content

Commit

Permalink
Corrected language filtering in functions
Browse files Browse the repository at this point in the history
This fixes #1295.
  • Loading branch information
Tomasz Zdybał committed Aug 7, 2017
1 parent 89dc3dd commit 2accea5
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 25 deletions.
60 changes: 60 additions & 0 deletions query/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ func populateGraph(t *testing.T) {
addEdgeToLangValue(t, "lossy", 0x1002, "Honey badger", "en", nil)
addEdgeToLangValue(t, "lossy", 0x1003, "Honey bee", "en", nil)

// full_name has hash index, we need following data for bug with eq (#1295)
addEdgeToLangValue(t, "royal_title", 0x10000, "Her Majesty Elizabeth the Second, by the Grace of God of the United Kingdom of Great Britain and Northern Ireland and of Her other Realms and Territories Queen, Head of the Commonwealth, Defender of the Faith", "en", nil)
addEdgeToLangValue(t, "royal_title", 0x10000, "Sa Majesté Elizabeth Deux, par la grâce de Dieu Reine du Royaume-Uni, du Canada et de ses autres royaumes et territoires, Chef du Commonwealth, Défenseur de la Foi", "fr", nil)

// regex test data
// 0x1234 is uid of interest for regex testing
addEdgeToValue(t, "name", 0x1234, "Regex Master", nil)
Expand Down Expand Up @@ -2213,6 +2217,7 @@ func TestDebug3(t *testing.T) {
require.NoError(t, json.Unmarshal([]byte(buf.Bytes()), &mp))

resp := mp["data"].(map[string]interface{})["me"]
require.NotNil(t, resp)
require.EqualValues(t, 1, len(resp.([]interface{})))
uid := resp.([]interface{})[0].(map[string]interface{})["_uid_"].(string)
require.EqualValues(t, "0x1", uid)
Expand Down Expand Up @@ -6671,6 +6676,45 @@ func TestLangLossyIndex4(t *testing.T) {
require.Error(t, err)
}

// Test for bug #1295
func TestLangBug1295(t *testing.T) {
populateGraph(t)
// query for Canadian (French) version of the royal_title, then show English one
// this case is not trivial, because farmhash of "en" is less than farmhash of "fr"
// so we need to iterate over values in all languages to find a match
// for alloftext, this won't work - we use default/English tokenizer for function parameters
// when no language is specified, while index contains tokens generated with French tokenizer

functions := []string{"eq", "allofterms" /*, "alloftext" */}
langs := []string{"", "@."}

for _, l := range langs {
for _, f := range functions {
t.Run(f+l, func(t *testing.T) {
query := `
{
q(func:` + f + "(royal_title" + l + `, "Sa Majesté Elizabeth Deux, par la grâce de Dieu Reine du Royaume-Uni, du Canada et de ses autres royaumes et territoires, Chef du Commonwealth, Défenseur de la Foi")) {
royal_title@en
}
}`

json, err := processToFastJsonReq(t, query)
require.NoError(t, err)
if l == "" {
require.JSONEq(t,
`{"data": {}}`,
json)
} else {
require.JSONEq(t,
`{"data": {"q":[{"royal_title@en":"Her Majesty Elizabeth the Second, by the Grace of God of the United Kingdom of Great Britain and Northern Ireland and of Her other Realms and Territories Queen, Head of the Commonwealth, Defender of the Faith"}]}}`,
json)
}
})
}
}

}

func checkSchemaNodes(t *testing.T, expected []*protos.SchemaNode, actual []*protos.SchemaNode) {
sort.Slice(expected, func(i, j int) bool {
return expected[i].Predicate >= expected[j].Predicate
Expand Down Expand Up @@ -6701,6 +6745,7 @@ func TestSchemaBlock1(t *testing.T) {
{Predicate: "geometry", Type: "geo"}, {Predicate: "alias", Type: "string"},
{Predicate: "dob", Type: "datetime"}, {Predicate: "survival_rate", Type: "float"},
{Predicate: "value", Type: "string"}, {Predicate: "full_name", Type: "string"},
{Predicate: "royal_title", Type: "string"},
{Predicate: "noindex_name", Type: "string"},
{Predicate: "lossy", Type: "string"},
{Predicate: "school", Type: "uid"},
Expand Down Expand Up @@ -6799,6 +6844,7 @@ friend : uid @reverse @count .
geometry : geo @index(geo) .
value : string @index(trigram) .
full_name : string @index(hash) .
royal_title : string @index(hash, term, fulltext) .
noindex_name : string .
school : uid @count .
lossy : string @index(term) .
Expand Down Expand Up @@ -8112,6 +8158,20 @@ func TestMultipleEquality4(t *testing.T) {
require.JSONEq(t, `{"data": {"me":[{"friend":[{"name":"Rick Grimes"},{"name":"Andrea"}],"name":"Michonne"},{"name":"Glenn Rhee"}]}}`, js)
}

func TestMultipleEquality5(t *testing.T) {
populateGraph(t)
query := `
{
me(func: eq(name@en, ["Honey badger", "Honey bee"])) {
name@en
}
}
`
js := processToFastJSON(t, query)
require.JSONEq(t, `{"data": {"me":[{"name@en":"Honey badger"},{"name@en":"Honey bee"}]}}`, js)
}

func TestMultipleGtError(t *testing.T) {
populateGraph(t)
query := `
Expand Down
4 changes: 3 additions & 1 deletion wiki/content/query-language/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ For example:
- `name@en:pl:.` => Look for `en`, then `pl`, then untagged, then any language.


{{% notice "note" %}}In functions, language lists and `.` notation are not allowed. In functions, a string edge without a language tag means apply function to all languages, while a single language tag means apply to only the given language.{{% /notice %}}
{{% notice "note" %}}In functions, language lists are not allowed. Single language, `.` notation and attribute name without language tag works as described above.{{% /notice %}}

{{% notice "note" %}}In case of full text search functions (`alloftext`, `anyoftext`), when no language is specified, default (English) Full Text Search tokenizer is used.{{% /notice %}}


Query Example: Some of Bollywood director and actor Farhan Akhtar's movies have a name stored in Russian as well as Hindi and English, others do not.
Expand Down
1 change: 1 addition & 0 deletions worker/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ func fetchValue(uid uint64, attr string, langs []string, scalar types.TypeID) (t
pl := posting.Get(x.DataKey(attr, uid))

src, err := pl.ValueFor(langs)

if err != nil {
return types.Val{}, err
}
Expand Down
12 changes: 11 additions & 1 deletion worker/stringfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type stringFilter struct {
tokens []string
match matchFn
ineqValue types.Val
eqVals []types.Val
}

func matchStrings(uids *protos.List, values []types.Val, filter stringFilter) *protos.List {
Expand Down Expand Up @@ -80,7 +81,16 @@ func defaultMatch(value types.Val, filter stringFilter) bool {
}

func ineqMatch(value types.Val, filter stringFilter) bool {
return types.CompareVals(filter.funcName, value, filter.ineqValue)
if len(filter.eqVals) == 0 {
return types.CompareVals(filter.funcName, value, filter.ineqValue)
}

for _, v := range filter.eqVals {
if types.CompareVals(filter.funcName, value, v) {
return true
}
}
return false
}

func tokenizeValue(value types.Val, filter stringFilter) []string {
Expand Down
60 changes: 49 additions & 11 deletions worker/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,16 +499,20 @@ func processTask(ctx context.Context, q *protos.Query, gid uint32) (*protos.Resu
}

// For string matching functions, check the language.
if (srcFn.fnType == StandardFn || srcFn.fnType == HasFn ||
srcFn.fnType == FullTextSearchFn || srcFn.fnType == CompareAttrFn) &&
len(srcFn.lang) > 0 {
if needsStringFiltering(srcFn) {
filterStringFunction(funcArgs{q, gid, srcFn, out})
}

out.IntersectDest = srcFn.intersectDest
return out, nil
}

func needsStringFiltering(srcFn *functionContext) bool {
return srcFn.isStringFn && srcFn.lang != "." &&
(srcFn.fnType == StandardFn || srcFn.fnType == HasFn ||
srcFn.fnType == FullTextSearchFn || srcFn.fnType == CompareAttrFn)
}

func handleHasFunction(arg funcArgs) error {
attr := arg.q.Attr
if ok := schema.State().HasCount(attr); !ok {
Expand Down Expand Up @@ -639,15 +643,35 @@ func handleCompareFunction(ctx context.Context, arg funcArgs) error {
default:
}
algo.ApplyFilter(arg.out.UidMatrix[row], func(uid uint64, i int) bool {
var langs []string
if len(arg.srcFn.lang) > 0 {
langs = append(langs, arg.srcFn.lang)
}
sv, err := fetchValue(uid, attr, langs, typ)
if sv.Value == nil || err != nil {
switch arg.srcFn.lang {
case "":
pl := posting.Get(x.DataKey(attr, uid))
sv, err := pl.Value()
if err == nil {
dst, err := types.Convert(sv, typ)
return err == nil &&
types.CompareVals(arg.q.SrcFunc[0], dst, arg.srcFn.eqTokens[row])
}
return false
case ".":
pl := posting.Get(x.DataKey(attr, uid))
values, _ := pl.AllValues()
for _, sv := range values {
dst, err := types.Convert(sv, typ)
if err == nil &&
types.CompareVals(arg.q.SrcFunc[0], dst, arg.srcFn.eqTokens[row]) {
return true
}
}
return false
default:
langs := []string{arg.srcFn.lang}
sv, err := fetchValue(uid, attr, langs, typ)
if sv.Value == nil || err != nil {
return false
}
return types.CompareVals(arg.q.SrcFunc[0], sv, arg.srcFn.eqTokens[row])
}
return types.CompareVals(arg.q.SrcFunc[0], sv, arg.srcFn.eqTokens[row])
})
}
}
Expand Down Expand Up @@ -687,7 +711,13 @@ func filterStringFunction(arg funcArgs) {
key := x.DataKey(attr, uid)
pl := posting.GetOrCreate(key, arg.gid)

val, err := pl.ValueForTag(arg.srcFn.lang)
var val types.Val
var err error
if arg.srcFn.lang == "" {
val, err = pl.Value()
} else {
val, err = pl.ValueForTag(arg.srcFn.lang)
}
if err != nil {
continue
}
Expand Down Expand Up @@ -716,6 +746,7 @@ func filterStringFunction(arg funcArgs) {
filtered = matchStrings(filtered, values, filter)
case CompareAttrFn:
filter.ineqValue = arg.srcFn.ineqValue
filter.eqVals = arg.srcFn.eqTokens
filter.match = ineqMatch
filtered = matchStrings(uids, values, filter)
}
Expand Down Expand Up @@ -755,6 +786,7 @@ type functionContext struct {
fnType FuncType
regex *cregexp.Regexp
isFuncAtRoot bool
isStringFn bool
}

const (
Expand Down Expand Up @@ -786,6 +818,11 @@ func parseSrcFn(q *protos.Query) (*functionContext, error) {
fc := &functionContext{fnType: fnType, fname: f}
var err error

t, err := schema.State().TypeOf(attr)
if err == nil && fnType != NotAFunction && t.Name() == types.StringID.Name() {
fc.isStringFn = true
}

switch fnType {
case NotAFunction:
fc.n = len(q.UidList.Uids)
Expand Down Expand Up @@ -831,6 +868,7 @@ func parseSrcFn(q *protos.Query) (*functionContext, error) {
}
fc.tokens = append(fc.tokens, tokens...)
fc.eqTokens = append(fc.eqTokens, fc.ineqValue)

}

// Number of index keys is more than no. of uids to filter, so its better to fetch data keys
Expand Down
24 changes: 12 additions & 12 deletions worker/worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func TestProcessTaskIndexMLayer(t *testing.T) {
require.NoError(t, err)

require.EqualValues(t, [][]uint64{
nil,
{},
{10, 12},
}, algo.ToUintsListForTest(r.UidMatrix))

Expand All @@ -184,9 +184,9 @@ func TestProcessTaskIndexMLayer(t *testing.T) {
require.NoError(t, err)

require.EqualValues(t, [][]uint64{
nil,
{},
{12},
nil,
{},
{10},
}, algo.ToUintsListForTest(r.UidMatrix))

Expand Down Expand Up @@ -215,8 +215,8 @@ func TestProcessTaskIndexMLayer(t *testing.T) {

require.EqualValues(t, [][]uint64{
{12},
nil,
nil,
{},
{},
}, algo.ToUintsListForTest(r.UidMatrix))

query = newQuery("friend", nil, []string{"anyofterms", "", "photon notphoton ignored"})
Expand All @@ -225,8 +225,8 @@ func TestProcessTaskIndexMLayer(t *testing.T) {

require.EqualValues(t, [][]uint64{
{12},
nil,
nil,
{},
{},
}, algo.ToUintsListForTest(r.UidMatrix))
}

Expand All @@ -242,7 +242,7 @@ func TestProcessTaskIndex(t *testing.T) {
require.NoError(t, err)

require.EqualValues(t, [][]uint64{
nil,
{},
{10, 12},
}, algo.ToUintsListForTest(r.UidMatrix))

Expand All @@ -264,9 +264,9 @@ func TestProcessTaskIndex(t *testing.T) {
require.NoError(t, err)

require.EqualValues(t, [][]uint64{
nil,
{},
{12},
nil,
{},
{10},
}, algo.ToUintsListForTest(r.UidMatrix))

Expand Down Expand Up @@ -295,8 +295,8 @@ func TestProcessTaskIndex(t *testing.T) {

require.EqualValues(t, [][]uint64{
{12},
nil,
nil,
{},
{},
}, algo.ToUintsListForTest(r.UidMatrix))
}

Expand Down

0 comments on commit 2accea5

Please sign in to comment.