From 9d21e1c13e31cc7509b030dd2a6917978cca0dd9 Mon Sep 17 00:00:00 2001 From: zwwhdls Date: Thu, 26 Dec 2024 21:31:04 +0800 Subject: [PATCH] add search context in search result & remove img in purecontent Signed-off-by: zwwhdls --- cmd/main.go | 4 +- pkg/dispatch/plugin/doc_process.go | 4 ++ pkg/dispatch/plugin/doc_process_test.go | 60 +++++++++++++++++++++++++ pkg/models/doc/document.go | 1 + pkg/service/chain.go | 26 ++++++++++- pkg/service/chain_test.go | 30 +++++++++++++ pkg/store/db/model.go | 4 ++ pkg/store/postgres/postgres.go | 3 +- 8 files changed, 128 insertions(+), 4 deletions(-) create mode 100644 pkg/dispatch/plugin/doc_process_test.go diff --git a/cmd/main.go b/cmd/main.go index 01315ff..84a4fea 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -35,14 +35,14 @@ var RootCmd = &cobra.Command{ } func init() { - logger.InitLog() - RootCmd.AddCommand(apps.ServeCmd) RootCmd.AddCommand(apps.AgentCmd) RootCmd.PersistentFlags().StringVar(&config.FilePath, "config", path.Join(config.LocalUserPath(), config.DefaultConfigBase), "friday config file") } func main() { + logger.InitLog() + defer logger.Sync() if err := RootCmd.Execute(); err != nil { panic(err) } diff --git a/pkg/dispatch/plugin/doc_process.go b/pkg/dispatch/plugin/doc_process.go index d8bcb42..1caefdf 100644 --- a/pkg/dispatch/plugin/doc_process.go +++ b/pkg/dispatch/plugin/doc_process.go @@ -68,6 +68,10 @@ func trimContent(content string) (string, error) { return "", err } + query.Find("body img").Each(func(i int, s *goquery.Selection) { + s.Remove() + }) + query.Find("body").EachWithBreak(func(i int, selection *goquery.Selection) bool { t := strings.TrimSpace(selection.Text()) if t != "" { diff --git a/pkg/dispatch/plugin/doc_process_test.go b/pkg/dispatch/plugin/doc_process_test.go new file mode 100644 index 0000000..21b2523 --- /dev/null +++ b/pkg/dispatch/plugin/doc_process_test.go @@ -0,0 +1,60 @@ +/* + Copyright 2024 Friday Author. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package plugin + +import "testing" + +func Test_trimContent(t *testing.T) { + type args struct { + content string + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "test", + args: args{ + content: ` + Example + +

This is a paragraph.

+ Image 1 + Image 2 +

This is another paragraph.

+ + `, + }, + want: "This is a paragraph. This is another paragraph.", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := trimContent(tt.args.content) + if (err != nil) != tt.wantErr { + t.Errorf("trimContent() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("trimContent() got = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/models/doc/document.go b/pkg/models/doc/document.go index ab07a19..2451f42 100644 --- a/pkg/models/doc/document.go +++ b/pkg/models/doc/document.go @@ -38,6 +38,7 @@ type Document struct { PureContent string `json:"pure_content,omitempty"` TitleTokens []string `json:"title_tokens,omitempty"` ContentTokens []string `json:"content_tokens,omitempty"` + SearchContext []string `json:"search_context,omitempty"` Marked *bool `json:"marked,omitempty"` Unread *bool `json:"unread,omitempty"` CreatedAt time.Time `json:"created_at"` diff --git a/pkg/service/chain.go b/pkg/service/chain.go index 12838cf..64c79d7 100644 --- a/pkg/service/chain.go +++ b/pkg/service/chain.go @@ -19,6 +19,7 @@ package service import ( "context" "fmt" + "regexp" "go.uber.org/zap" @@ -154,7 +155,30 @@ func (c *Chain) GetDocument(ctx context.Context, namespace string, entryId int64 func (c *Chain) Search(ctx context.Context, filter *doc.DocumentFilter) ([]*doc.Document, error) { ctx = c.WithNamespace(ctx, filter.Namespace) c.Log.Debugf("search document: %+v", filter.String()) - return c.DocClient.FilterDocuments(ctx, filter) + docs, err := c.DocClient.FilterDocuments(ctx, filter) + if err != nil { + c.Log.Errorf("search document error: %s", err) + return nil, err + } + for _, d := range docs { + c.GenContext(filter.Search, d) + } + return docs, nil +} + +func (c *Chain) GenContext(search string, document *doc.Document) { + pattern := fmt.Sprintf(`(.{0,100})(%s)(.{0,100})`, regexp.QuoteMeta(search)) + re := regexp.MustCompile(pattern) + + matches := re.FindAllStringSubmatch(document.PureContent, -1) + if len(matches) > 0 { + for _, match := range matches { + before := match[1] + matchStr := match[2] + after := match[3] + document.SearchContext = append(document.SearchContext, fmt.Sprintf("...%s%s%s...", before, matchStr, after)) + } + } } func (c *Chain) Delete(ctx context.Context, namespace string, entryId int64) error { diff --git a/pkg/service/chain_test.go b/pkg/service/chain_test.go index 677cdbf..0be76b1 100644 --- a/pkg/service/chain_test.go +++ b/pkg/service/chain_test.go @@ -18,6 +18,7 @@ package service_test import ( "context" + "strings" "time" . "github.com/onsi/ginkgo/v2" @@ -156,3 +157,32 @@ var _ = Describe("Chain", func() { }) }) }) + +var _ = Describe("Search Context", func() { + var ( + Chain *service.Chain + ) + BeforeEach(func() { + service.ChainPool = dispatch.NewPool(10) + logger.InitLog() + Chain = &service.Chain{ + Log: logger.NewLog("test"), + } + }) + + Describe("GenContext", func() { + Context("GenContext", func() { + It("GenContext should be successful", func() { + document := &doc.Document{ + EntryId: 1, + PureContent: strings.Repeat("friday", 100) + "nanafs" + strings.Repeat("basenana", 100) + "nanafs" + strings.Repeat("friday", 100), + } + Chain.GenContext("nanafs", document) + Expect(document.SearchContext).Should(Equal([]string{ + "...iday" + strings.Repeat("friday", 16) + "nanafs" + strings.Repeat("basenana", 12) + "base...", + "...nana" + strings.Repeat("basenana", 12) + "nanafs" + strings.Repeat("friday", 16) + "frid...", + })) + }) + }) + }) +}) diff --git a/pkg/store/db/model.go b/pkg/store/db/model.go index ee97461..1bd99fd 100644 --- a/pkg/store/db/model.go +++ b/pkg/store/db/model.go @@ -206,6 +206,9 @@ func (d *Document) UpdateFrom(document *doc.Document) *Document { if document.ParentEntryID != nil { d.ParentEntryID = document.ParentEntryID } + if document.PureContent != "" { + d.PureContent = document.PureContent + } return d } @@ -220,6 +223,7 @@ func (d *Document) To() *doc.Document { Summary: d.Summary, SubContent: d.SubContent, HeaderImage: d.HeaderImage, + PureContent: d.PureContent, Marked: &d.Marked, Unread: &d.Unread, CreatedAt: d.CreatedAt, diff --git a/pkg/store/postgres/postgres.go b/pkg/store/postgres/postgres.go index ae5ef08..2d8a1a9 100644 --- a/pkg/store/postgres/postgres.go +++ b/pkg/store/postgres/postgres.go @@ -17,6 +17,7 @@ package postgres import ( + "fmt" "time" "go.uber.org/zap" @@ -39,7 +40,7 @@ func NewPostgresClient(postgresUrl string) (*PostgresClient, error) { log := logger.NewLog("postgres") dbObj, err := gorm.Open(postgres.Open(postgresUrl), &gorm.Config{Logger: utils.NewDbLogger()}) if err != nil { - panic(err) + return nil, fmt.Errorf("failed to connect to postgres: %w", err) } dbConn, err := dbObj.DB()