Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for Phonetic Matching #92

Merged
merged 1 commit into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion redisearch/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (i *Client) CreateIndexWithIndexDefinition(schema *Schema, definition *Inde

// internal method
func (i *Client) indexWithDefinition(indexName string, schema *Schema, definition *IndexDefinition) (err error) {
args := redis.Args{i.name}
args := redis.Args{indexName}
if definition != nil {
args = definition.Serialize(args)
}
Expand Down
44 changes: 44 additions & 0 deletions redisearch/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"reflect"
"testing"
"time"

"github.com/gomodule/redigo/redis"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -850,3 +851,46 @@ func TestClient_CreateIndexWithIndexDefinition1(t *testing.T) {

}
}

func TestClient_CreateIndex(t *testing.T) {
c := createClient("create-index-phonetic")
version, err := c.getRediSearchVersion()
assert.Nil(t, err)
if version <= 10699 {
// IndexDefinition is available for RediSearch 2.0+
return
}

// Create a schema
schema := NewSchema(DefaultOptions).
AddField(NewTextFieldOptions("name", TextFieldOptions{Sortable: true, PhoneticMatcher: PhoneticDoubleMetaphoneEnglish})).
AddField(NewNumericField("age"))

// IndexDefinition is available for RediSearch 2.0+
// In this example we will only index keys started by product:
indexDefinition := NewIndexDefinition().AddPrefix("create-index-phonetic:")

// Add the Index Definition
c.CreateIndexWithIndexDefinition(schema, indexDefinition)
assert.Nil(t, err)

// Create docs with a name that has the same phonetic matcher
vanillaConnection := c.pool.Get()
vanillaConnection.Do("HSET", "create-index-phonetic:doc1", "name", "Jon", "age", 25)
vanillaConnection.Do("HSET", "create-index-phonetic:doc2", "name", "John", "age", 20)

// Wait for all documents to be indexed
info, _ := c.Info()
for info.IsIndexing {
time.Sleep(time.Second)
info, _ = c.Info()
}

docs, total, err := c.Search(NewQuery("Jon").
SetReturnFields("name"))
assert.Nil(t, err)
// Verify that the we've received 2 documents ( Jon and John )
assert.Equal(t, 2, total)
assert.Equal(t, "Jon", docs[0].Properties["name"])
assert.Equal(t, "John", docs[1].Properties["name"])
}
48 changes: 48 additions & 0 deletions redisearch/example_schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package redisearch_test

import (
"fmt"
"github.com/gomodule/redigo/redis"
"log"
"time"

Expand Down Expand Up @@ -50,3 +51,50 @@ func ExampleCreateIndex_temporary() {
// Output: ExampleCreateIndex_temporary:doc1 Hello world 1 <nil>
// Unknown Index name
}

// exemplifies the CreateIndex function with phonetic matching on it in searches by default
func ExampleClient_CreateIndexWithIndexDefinition_phonetic() {
// Create a client
host := "localhost:6379"
password := ""
pool := &redis.Pool{Dial: func() (redis.Conn, error) {
return redis.Dial("tcp", host, redis.DialPassword(password))
}}
c := redisearch.NewClientFromPool(pool, "myPhoneticIndex")

// Create a schema
schema := redisearch.NewSchema(redisearch.DefaultOptions).
AddField(redisearch.NewTextFieldOptions("name", redisearch.TextFieldOptions{Sortable: true, PhoneticMatcher: redisearch.PhoneticDoubleMetaphoneEnglish})).
AddField(redisearch.NewNumericField("age"))

// IndexDefinition is available for RediSearch 2.0+
// Create a index definition for automatic indexing on Hash updates.
// In this example we will only index keys started by product:
indexDefinition := redisearch.NewIndexDefinition().AddPrefix("myPhoneticIndex:")

// Add the Index Definition
c.CreateIndexWithIndexDefinition(schema, indexDefinition)

// Create docs with a name that has the same phonetic matcher
vanillaConnection := pool.Get()
vanillaConnection.Do("HSET", "myPhoneticIndex:doc1", "name", "Jon", "age", 25)
// Create a second document with a name that has the same phonetic matcher
vanillaConnection.Do("HSET", "myPhoneticIndex:doc2", "name", "John", "age", 20)
// Create a third document with a name that does not have the same phonetic matcher
vanillaConnection.Do("HSET", "myPhoneticIndex:doc3", "name", "Pieter", "age", 30)

// Wait for all documents to be indexed
info, _ := c.Info()
for info.IsIndexing {
time.Sleep(time.Second)
info, _ = c.Info()
}

_, total, _ := c.Search(redisearch.NewQuery("Jon").
SetReturnFields("name"))

// Verify that the we've received 2 documents ( Jon and John )
fmt.Printf("Total docs replied %d\n", total)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why print and not assert?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MeirShpilraien this is an example. We're supposed to print and check the output matches. This is ment for godoc:

        // Verify that the we've received 2 documents ( Jon and John )
	fmt.Printf("Total docs replied %d\n", total)

	// Output: Total docs replied 2

Expected godoc ( check output field):
image


// Output: Total docs replied 2
}
24 changes: 20 additions & 4 deletions redisearch/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import (
// FieldType is an enumeration of field/property types
type FieldType int

// PhoneticMatcherType is an enumeration of the phonetic algorithm and language used.
type PhoneticMatcherType string

// Options are flags passed to the the abstract Index call, which receives them as interface{}, allowing
// for implementation specific options
type Options struct {
Expand Down Expand Up @@ -98,6 +101,7 @@ var DefaultOptions = Options{
MaxTextFieldsFlag: false,
}

// Field Types
const (
// TextField full-text field
TextField FieldType = iota
Expand All @@ -112,6 +116,14 @@ const (
TagField
)

// Phonetic Matchers
const (
PhoneticDoubleMetaphoneEnglish PhoneticMatcherType = "dm:en"
PhoneticDoubleMetaphoneFrench PhoneticMatcherType = "dm:fr"
PhoneticDoubleMetaphonePortuguese PhoneticMatcherType = "dm:pt"
PhoneticDoubleMetaphoneSpanish PhoneticMatcherType = "dm:es"
)

// Field represents a single field's Schema
type Field struct {
Name string
Expand All @@ -122,10 +134,11 @@ type Field struct {

// TextFieldOptions Options for text fields - weight and stemming enabled/disabled.
type TextFieldOptions struct {
Weight float32
Sortable bool
NoStem bool
NoIndex bool
Weight float32
Sortable bool
NoStem bool
NoIndex bool
PhoneticMatcher PhoneticMatcherType
}

// TagFieldOptions options for indexing tag fields
Expand Down Expand Up @@ -306,6 +319,9 @@ func serializeField(f Field, args redis.Args) (argsOut redis.Args, err error) {
if opts.NoStem {
argsOut = append(argsOut, "NOSTEM")
}
if opts.PhoneticMatcher != "" {
argsOut = append(argsOut, "PHONETIC", string(opts.PhoneticMatcher))
}
if opts.Sortable {
argsOut = append(argsOut, "SORTABLE")
}
Expand Down
2 changes: 2 additions & 0 deletions redisearch/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ func TestSerializeSchema(t *testing.T) {
{"default-and-text", args{NewSchema(DefaultOptions).AddField(NewTextField("text-field")), redis.Args{}}, redis.Args{"SCHEMA", "text-field", "TEXT"}, false},
{"default-and-sortable-text-field", args{NewSchema(DefaultOptions).AddField(NewSortableTextField("text-field", 10)), redis.Args{}}, redis.Args{"SCHEMA", "text-field", "TEXT", "WEIGHT", float32(10.0), "SORTABLE"}, false},
{"default-and-text-with-options", args{NewSchema(DefaultOptions).AddField(NewTextFieldOptions("text-field", TextFieldOptions{Weight: 5.0, Sortable: true, NoStem: false, NoIndex: false})), redis.Args{}}, redis.Args{"SCHEMA", "text-field", "TEXT", "WEIGHT", float32(5.0), "SORTABLE"}, false},
{"default-and-text-with-phonetic-en", args{NewSchema(DefaultOptions).AddField(NewTextFieldOptions("text-field", TextFieldOptions{PhoneticMatcher: PhoneticDoubleMetaphoneEnglish})), redis.Args{}}, redis.Args{"SCHEMA", "text-field", "TEXT", "PHONETIC", "dm:en"}, false},
{"default-and-text-with-phonetic-pt", args{NewSchema(DefaultOptions).AddField(NewTextFieldOptions("text-field", TextFieldOptions{PhoneticMatcher: PhoneticDoubleMetaphonePortuguese})), redis.Args{}}, redis.Args{"SCHEMA", "text-field", "TEXT", "PHONETIC", "dm:pt"}, false},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What those are used for?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to test the internal schema serialization.

{"default-and-tag", args{NewSchema(DefaultOptions).AddField(NewTagField("tag-field")), redis.Args{}}, redis.Args{"SCHEMA", "tag-field", "TAG", "SEPARATOR", ","}, false},
{"default-and-tag-with-options", args{NewSchema(DefaultOptions).AddField(NewTagFieldOptions("tag-field", TagFieldOptions{Sortable: true, NoIndex: false, Separator: byte(',')})), redis.Args{}}, redis.Args{"SCHEMA", "tag-field", "TAG", "SEPARATOR", ",", "SORTABLE"}, false},
{"error-unsupported", args{NewSchema(DefaultOptions).AddField(Field{Type: 10}), redis.Args{}}, nil, true},
Expand Down