From ed4935d40dd2ffe84812c09e1ab229b68bba71ea Mon Sep 17 00:00:00 2001 From: "@k33g" Date: Sun, 19 May 2024 10:06:04 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=A6=20=F0=9F=A6=9C=20Parakeet=20v0.0.4?= =?UTF-8?q?=20=F0=9F=93=9D=20[memo]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + .vscode/settings.json | 4 +- README.md | 116 ++++++++++++++++++ {demo => ama}/.gitignore | 0 {demo => ama}/README.md | 3 + {demo => ama}/embeddings-from-code/go.mod | 0 {demo => ama}/embeddings-from-code/go.sum | 0 ama/embeddings-from-code/main.go | 63 ++++++++++ ama/search/demo.md | 4 + {demo => ama}/search/go.mod | 0 {demo => ama}/search/go.sum | 0 {demo => ama}/search/main.go | 22 ++-- completion/completion.go | 1 + content/content_test.go | 79 ++++++++++++ content/contents-for-test/01/hello.txt | 1 + content/contents-for-test/01/hey.txt | 1 + content/contents-for-test/02/hello.txt | 1 + content/contents-for-test/02/hey.txt | 1 + content/context.go | 29 +++++ content/files.go | 81 ++++++++++++ content/text.go | 81 ++++++++++++ .../data/James-T-Kirk.txt | 5 - .../data/Jean-Luc-Picard.txt | 6 - .../data/Michael-Burnham.txt | 5 - .../data/Philippe-Charriere.txt | 5 - demo/embeddings-from-code/main.go | 114 ----------------- embeddings/context.go | 25 ++++ examples/01-generate/main.go | 4 +- examples/02-generate-stream/main.go | 4 +- examples/03-chat/main.go | 4 +- examples/04-chat-stream/main.go | 4 +- examples/05-context/main.go | 4 +- examples/06-summary/main.go | 4 +- examples/07-explain/main.go | 4 +- .../10-chat-conversational-memory/main.go | 4 +- .../begin/main.go | 4 +- .../resume/main.go | 4 +- feature.agent.todo | 13 ++ feature.options.todo | 1 + feature.wasm.plugins.todo | 2 + feature.yaml.retriever.for.context.todo | 1 + feature.yaml.retriever.for.rag.todo | 6 + fix.raise.error.if.llm.not.loded.todo | 0 go.mod | 5 +- go.sum | 3 + go.work | 5 +- go.work.sum | 11 +- parakeet.go | 4 +- release.env | 4 +- 49 files changed, 556 insertions(+), 187 deletions(-) rename {demo => ama}/.gitignore (100%) rename {demo => ama}/README.md (52%) rename {demo => ama}/embeddings-from-code/go.mod (100%) rename {demo => ama}/embeddings-from-code/go.sum (100%) create mode 100644 ama/embeddings-from-code/main.go create mode 100644 ama/search/demo.md rename {demo => ama}/search/go.mod (100%) rename {demo => ama}/search/go.sum (100%) rename {demo => ama}/search/main.go (76%) create mode 100644 content/content_test.go create mode 100644 content/contents-for-test/01/hello.txt create mode 100644 content/contents-for-test/01/hey.txt create mode 100644 content/contents-for-test/02/hello.txt create mode 100644 content/contents-for-test/02/hey.txt create mode 100644 content/context.go create mode 100644 content/files.go create mode 100644 content/text.go delete mode 100644 demo/embeddings-from-code/data/James-T-Kirk.txt delete mode 100644 demo/embeddings-from-code/data/Jean-Luc-Picard.txt delete mode 100644 demo/embeddings-from-code/data/Michael-Burnham.txt delete mode 100644 demo/embeddings-from-code/data/Philippe-Charriere.txt delete mode 100644 demo/embeddings-from-code/main.go create mode 100644 embeddings/context.go create mode 100644 feature.agent.todo create mode 100644 feature.options.todo create mode 100644 feature.wasm.plugins.todo create mode 100644 feature.yaml.retriever.for.context.todo create mode 100644 feature.yaml.retriever.for.rag.todo create mode 100644 fix.raise.error.if.llm.not.loded.todo diff --git a/.gitignore b/.gitignore index e120bef..1cec568 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ ollama-from-scratch +prompting diff --git a/.vscode/settings.json b/.vscode/settings.json index 6a92518..d23e9e9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,8 +1,8 @@ { "workbench.iconTheme": "material-icon-theme", "workbench.colorTheme": "Catppuccin FrappΓ©", - "editor.fontSize": 18, - "terminal.integrated.fontSize": 18, + "editor.fontSize": 15, + "terminal.integrated.fontSize": 15, "editor.insertSpaces": true, "editor.tabSize": 4, "editor.detectIndentation": true, diff --git a/README.md b/README.md index 54ffc09..2a1e0c7 100644 --- a/README.md +++ b/README.md @@ -434,6 +434,122 @@ store.Initialize("../embeddings.db") > - `examples/09-embeddings-bbolt/create-embeddings`: create and populate the vector store > - `examples/09-embeddings-bbolt/use-embeddings`: search similarities in the vector store + +## Create embeddings from text files and Similarity search + +### Create embeddings +```golang +ollamaUrl := "http://localhost:11434" +embeddingsModel := "all-minilm" + +store := embeddings.BboltVectorStore{} +store.Initialize("../embeddings.db") + +// Parse all golang source code of the examples +// Create embeddings from documents and save them in the store +counter := 0 +_, err := content.ForEachFile("../../examples", ".go", func(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + fmt.Println("πŸ“ Creating embedding from:", path) + counter++ + embedding, err := embeddings.CreateEmbedding( + ollamaUrl, + llm.Query4Embedding{ + Model: embeddingsModel, + Prompt: string(data), + }, + strconv.Itoa(counter), // don't forget the id (unique identifier) + ) + fmt.Println("πŸ“¦ Created: ", len(embedding.Embedding)) + + if err != nil { + fmt.Println("😑:", err) + } else { + _, err := store.Save(embedding) + if err != nil { + fmt.Println("😑:", err) + } + } + return nil +}) +if err != nil { + log.Fatalln("😑:", err) +} +``` + +### Similarity search + +```golang +ollamaUrl := "http://localhost:11434" +embeddingsModel := "all-minilm" +chatModel := "magicoder:latest" + +store := embeddings.BboltVectorStore{} +store.Initialize("../embeddings.db") + +systemContent := `You are a Golang developer and an expert in computer programming. +Please make friendly answer for the noobs. Use the provided context and doc to answer. +Add source code examples if you can.` + +// Question for the Chat system +userContent := `How to create a stream chat completion with Parakeet?` + +// Create an embedding from the user question +embeddingFromQuestion, err := embeddings.CreateEmbedding( + ollamaUrl, + llm.Query4Embedding{ + Model: embeddingsModel, + Prompt: userContent, + }, + "question", +) +if err != nil { + log.Fatalln("😑:", err) +} +fmt.Println("πŸ”Ž searching for similarity...") + +similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.3) + +// Generate the context from the similarities +// This will generate a string with a content like this one: +// `......` +documentsContent := embeddings.GenerateContextFromSimilarities(similarities) + +fmt.Println("πŸŽ‰ similarities", len(similarities)) + +query := llm.Query{ + Model: chatModel, + Messages: []llm.Message{ + {Role: "system", Content: systemContent}, + {Role: "system", Content: documentsContent}, + {Role: "user", Content: userContent}, + }, + Options: llm.Options{ + Temperature: 0.4, + RepeatLastN: 2, + }, + Stream: false, +} + +fmt.Println("") +fmt.Println("πŸ€– answer:") + +// Answer the question +_, err = completion.ChatStream(ollamaUrl, query, + func(answer llm.Answer) error { + fmt.Print(answer.Message.Content) + return nil + }) + +if err != nil { + log.Fatal("😑:", err) +} +``` + ## Demos - https://github.com/parakeet-nest/parakeet-demo diff --git a/demo/.gitignore b/ama/.gitignore similarity index 100% rename from demo/.gitignore rename to ama/.gitignore diff --git a/demo/README.md b/ama/README.md similarity index 52% rename from demo/README.md rename to ama/README.md index c9c1746..5a40ded 100644 --- a/demo/README.md +++ b/ama/README.md @@ -1 +1,4 @@ # Demo: use RAG to add Parakeet knowledge to LLMs +> ask me anything about Parakeet + +> 🚧 wip diff --git a/demo/embeddings-from-code/go.mod b/ama/embeddings-from-code/go.mod similarity index 100% rename from demo/embeddings-from-code/go.mod rename to ama/embeddings-from-code/go.mod diff --git a/demo/embeddings-from-code/go.sum b/ama/embeddings-from-code/go.sum similarity index 100% rename from demo/embeddings-from-code/go.sum rename to ama/embeddings-from-code/go.sum diff --git a/ama/embeddings-from-code/main.go b/ama/embeddings-from-code/main.go new file mode 100644 index 0000000..c61f4fa --- /dev/null +++ b/ama/embeddings-from-code/main.go @@ -0,0 +1,63 @@ +package main + +import ( + "fmt" + "log" + "os" + "strconv" + + "github.com/parakeet-nest/parakeet/content" + "github.com/parakeet-nest/parakeet/embeddings" + "github.com/parakeet-nest/parakeet/llm" +) + +func main() { + ollamaUrl := "http://localhost:11434" + // if working from a container + //ollamaUrl := "http://host.docker.internal:11434" + //var embeddingsModel = "magicoder:latest" + + embeddingsModel := "all-minilm" + + store := embeddings.BboltVectorStore{} + store.Initialize("../embeddings.db") + + // Parse all source code of the examples + // Create embeddings from documents and save them in the store + + counter := 0 + _, err := content.ForEachFile("../../examples", ".go", func(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + fmt.Println("πŸ“ Creating embedding from:", path) + counter++ + embedding, err := embeddings.CreateEmbedding( + ollamaUrl, + llm.Query4Embedding{ + Model: embeddingsModel, + Prompt: string(data), + }, + strconv.Itoa(counter), // don't forget the id (unique identifier) + ) + fmt.Println("πŸ“¦ Created: ", len(embedding.Embedding)) + + if err != nil { + fmt.Println("😑:", err) + } else { + _, err := store.Save(embedding) + if err != nil { + fmt.Println("😑:", err) + } + } + + return nil + }) + if err != nil { + log.Fatalln("😑:", err) + } + + +} diff --git a/ama/search/demo.md b/ama/search/demo.md new file mode 100644 index 0000000..633ed7a --- /dev/null +++ b/ama/search/demo.md @@ -0,0 +1,4 @@ +πŸ”Ž searching for similarity... +πŸŽ‰ similarities 15 + +πŸ€– answer: diff --git a/demo/search/go.mod b/ama/search/go.mod similarity index 100% rename from demo/search/go.mod rename to ama/search/go.mod diff --git a/demo/search/go.sum b/ama/search/go.sum similarity index 100% rename from demo/search/go.sum rename to ama/search/go.sum diff --git a/demo/search/main.go b/ama/search/main.go similarity index 76% rename from demo/search/main.go rename to ama/search/main.go index e82820e..175dce6 100644 --- a/demo/search/main.go +++ b/ama/search/main.go @@ -15,20 +15,19 @@ func main() { //ollamaUrl := "http://host.docker.internal:11434" //ollamaUrl := "http://bob.local:11434" - var embeddingsModel = "magicoder:latest" - var smallChatModel = "magicoder:latest" + embeddingsModel := "all-minilm" + chatModel := "magicoder:latest" store := embeddings.BboltVectorStore{} store.Initialize("../embeddings.db") systemContent := `You are a Golang developer and an expert in computer programming. - Please make friendly answer for the noobs. Use the provided context to answer. + Please make friendly answer for the noobs. Use the provided context and doc to answer. Add source code examples if you can.` // Question for the Chat system //userContent := `[Brief] How to create a stream completion with Parakeet?` - userContent := `[Brief] How to create a stream chat completion with Parakeet?` - + userContent := `How to create a stream chat completion with Parakeet?` // Create an embedding from the user question embeddingFromQuestion, err := embeddings.CreateEmbedding( @@ -44,20 +43,15 @@ func main() { } fmt.Println("πŸ”Ž searching for similarity...") - //similarity, _ := store.SearchMaxSimilarity(embeddingFromQuestion) - similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.0) + similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.3) - documentsContent := "" - for _, similarity := range similarities { - documentsContent += fmt.Sprintf("%s", similarity.Prompt) - } - documentsContent += "" + documentsContent := embeddings.GenerateContextFromSimilarities(similarities) - fmt.Println("πŸŽ‰ similarities", similarities) + fmt.Println("πŸŽ‰ similarities", len(similarities)) query := llm.Query{ - Model: smallChatModel, + Model: chatModel, Messages: []llm.Message{ {Role: "system", Content: systemContent}, {Role: "system", Content: documentsContent}, diff --git a/completion/completion.go b/completion/completion.go index b6dc80c..80b577c 100644 --- a/completion/completion.go +++ b/completion/completion.go @@ -83,6 +83,7 @@ func completionStream(url string, kindOfCompletion string, query llm.Query, onCh onChunk(llm.Answer{}) } + // ? πŸ€” and if I used answer + error as a parameter? err = onChunk(answer) // generate an error to stop the stream diff --git a/content/content_test.go b/content/content_test.go new file mode 100644 index 0000000..fe13d8f --- /dev/null +++ b/content/content_test.go @@ -0,0 +1,79 @@ +package content + +import ( + "fmt" + "strings" + "testing" +) + +func TestGetArrayOfContentFiles(t *testing.T) { + + content, err := GetArrayOfContentFiles("./contents-for-test", ".txt") + + if err != nil { + t.Fatal(err) + } + + fmt.Println("πŸ“ content", content) + + if content[0] != "hello world" { + t.Fatal("hello world not found") + } + if content[1] != "hey people" { + t.Fatal("hey people not found") + } + + if content[2] != "hello world" { + t.Fatal("hello world not found") + } + if content[3] != "hey people" { + t.Fatal("hey people not found") + } + +} + +func TestGetMapOfContentFiles(t *testing.T) { + + content, err := GetMapOfContentFiles("./contents-for-test", ".txt") + + if err != nil { + t.Fatal(err) + } + + fmt.Println("πŸ“ content", content) + + if content["contents-for-test/01/hello.txt"] != "hello world" { + t.Fatal("hello world not found") + } + if content["contents-for-test/02/hey.txt"] != "hey people" { + t.Fatal("hey people not found") + } + if content["contents-for-test/01/hello.txt"] != "hello world" { + t.Fatal("hello world not found") + } + if content["contents-for-test/02/hey.txt"] != "hey people" { + t.Fatal("hey people not found") + } + +} + +func TestGenerateContextFromDocs(t *testing.T) { + content, err := GetArrayOfContentFiles("./contents-for-test", ".txt") + + if err != nil { + t.Fatal(err) + } + + context := GenerateContextFromDocs(content) + + fmt.Println("πŸ“ context", context) + + if strings.Contains(context, "hello world") == false { + t.Fatal("hello world not found") + } + if strings.Contains(context, "hey people") == false { + t.Fatal("hey people not found") + } + +} + diff --git a/content/contents-for-test/01/hello.txt b/content/contents-for-test/01/hello.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/content/contents-for-test/01/hello.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/content/contents-for-test/01/hey.txt b/content/contents-for-test/01/hey.txt new file mode 100644 index 0000000..0d53f29 --- /dev/null +++ b/content/contents-for-test/01/hey.txt @@ -0,0 +1 @@ +hey people \ No newline at end of file diff --git a/content/contents-for-test/02/hello.txt b/content/contents-for-test/02/hello.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/content/contents-for-test/02/hello.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/content/contents-for-test/02/hey.txt b/content/contents-for-test/02/hey.txt new file mode 100644 index 0000000..0d53f29 --- /dev/null +++ b/content/contents-for-test/02/hey.txt @@ -0,0 +1 @@ +hey people \ No newline at end of file diff --git a/content/context.go b/content/context.go new file mode 100644 index 0000000..f15d02f --- /dev/null +++ b/content/context.go @@ -0,0 +1,29 @@ +package content + +import "fmt" + +// GenerateContextFromDocs generates the context content from a slice of documents. +// +// Remarks: you can use the generated content to add context to a prompt for an LLM. +// +// Parameters: +// - docs: a slice of strings representing the documents. +// +// Returns: +// - string: the generated context content in XML format. +func GenerateContextFromDocs(docs []string) string { + + documentsContent := "\n" + for _, doc := range docs { + documentsContent += fmt.Sprintf("%s\n", doc) + } + documentsContent += "" + return documentsContent +} +/* +This is a Go function called GenerateContextFromDocs that takes a slice of strings as input and returns a string in XML format. +The function generates the context content from a slice of documents by iterating over each document in the slice and appending it to a string in the format document content. +Finally, the function wraps the entire content in tags and returns it. +*/ + +// TODO: GenerateContextWithTags \ No newline at end of file diff --git a/content/files.go b/content/files.go new file mode 100644 index 0000000..482e76e --- /dev/null +++ b/content/files.go @@ -0,0 +1,81 @@ +package content + +import ( + "os" + "path/filepath" +) + +// FindFiles searches for files with a specific extension in the given root directory and its subdirectories. +// +// Parameters: +// - root: The root directory to start the search from. +// - ext: The file extension to search for. +// examples: ".md", ".html", ".txt", "*.*" +// +// Returns: +// - []string: A slice of file paths that match the given extension. +// - error: An error if the search encounters any issues. +func FindFiles(dirPath string, ext string) ([]string, error) { + var textFiles []string + err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && filepath.Ext(path) == ext { + textFiles = append(textFiles, path) + } + return nil + }) + return textFiles, err +} +/* +This is a Go function named FindFiles that searches for files with a specific extension in a given root directory and its subdirectories. +It takes two parameters: dirPath (the directory path to start the search from) and ext (the file extension to search for). +It returns a slice of file paths that match the given extension and an error if the search encounters any issues. + +The function uses the filepath.Walk function to iterate over all files in the directory and its subdirectories. +For each file found, it checks if it is not a directory and if its extension matches the given extension. +If it does, it appends the file path to the textFiles slice. + +If there is an error during the search, it is returned. +Otherwise, the textFiles slice and any error encountered during the search are returned. +*/ + +// ForEachFile iterates over all files with a specific extension in a directory and its subdirectories. +// +// Parameters: +// - dirPath: The root directory to start the search from. +// - ext: The file extension to search for. +// - callback: A function to be called for each file found. +// +// Returns: +// - []string: A slice of file paths that match the given extension. +// - error: An error if the search encounters any issues. +func ForEachFile(dirPath string, ext string, callback func(string) error) ([]string, error) { + var textFiles []string + err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && filepath.Ext(path) == ext { + textFiles = append(textFiles, path) + err = callback(path) + // generate an error to stop the walk + if err != nil { + return err + } + } + return nil + }) + return textFiles, err +} +/* +This code snippet defines a function called ForEachFile in Go. +It takes three parameters: dirPath (the root directory to start the search from), ext (the file extension to search for), and callback (a function to be called for each file found). + +The function uses the filepath.Walk function to iterate over all files in the directory and its subdirectories. +For each file found, it checks if it is not a directory and if its extension matches the given extension. +If it does, it appends the file path to the textFiles slice and calls the callback function with the file path. + +The function returns a slice of file paths that match the given extension and an error if the search encounters any issues. +*/ diff --git a/content/text.go b/content/text.go new file mode 100644 index 0000000..fe9dfcd --- /dev/null +++ b/content/text.go @@ -0,0 +1,81 @@ +package content + +import ( + "os" +) + +// GetArrayOfContentFiles searches for files with a specific extension in the given directory and its subdirectories. +// +// Parameters: +// - dirPath: The directory path to start the search from. +// - ext: The file extension to search for. +// +// Returns: +// - []string: A slice of file paths that match the given extension. +// - error: An error if the search encounters any issues. +func GetArrayOfContentFiles(dirPath string, ext string) ([]string, error) { + content := []string{} + _, err := ForEachFile(dirPath, ext, func(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + content = append(content, string(data)) + return nil + }) + if err != nil { + return nil, err + } + + return content, nil +} + +/* +This is a Go function named GetArrayOfContentFiles that searches for files with a specific extension in a given directory and its subdirectories. +It takes two parameters: dirPath (the directory path to start the search from) and ext (the file extension to search for). +It returns a slice of file paths that match the given extension and an error if the search encounters any issues. +The function uses the ForEachFile function to iterate over all files with the given extension in the directory and its subdirectories. +For each file found, it reads the file's content using os.ReadFile and appends it to the content slice. +If there is an error reading the file, it returns the error. +Finally, it returns the content slice and any error encountered during the search. +*/ + +// GetMapOfContentFiles searches for files with a specific extension in the given directory and its subdirectories. +// +// Parameters: +// - dirPath: The directory path to start the search from. +// - ext: The file extension to search for. +// +// Returns: +// - map[string]string: A map of file paths to their contents, where the keys are the base names of the files and the values are the file contents as strings. +// - error: An error if the search encounters any issues. +func GetMapOfContentFiles(dirPath string, ext string) (map[string]string, error) { + content := map[string]string{} + _, err := ForEachFile(dirPath, ext, func(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + content[path] = string(data) + return nil + }) + if err != nil { + return nil, err + } + + return content, nil +} + +/* +This is a Go function named GetMapOfContentFiles that searches for files with a specific extension in a given directory and its subdirectories. +It takes two parameters: dirPath (the directory path to start the search from) and ext (the file extension to search for). +It returns a map of file paths to their contents, where the keys are the path of the files and the values are the file contents as strings. +If the search encounters any issues, it returns an error. + +The function uses the ForEachFile function to iterate over all files with the given extension in the directory and its subdirectories. +For each file found, it reads the file's content using os.ReadFile and adds it to the content map with the path of the file as the key and the file content as the value. +If there is an error reading the file, it returns the error. +Finally, it returns the content map and any error encountered during the search. +*/ diff --git a/demo/embeddings-from-code/data/James-T-Kirk.txt b/demo/embeddings-from-code/data/James-T-Kirk.txt deleted file mode 100644 index c8b5d04..0000000 --- a/demo/embeddings-from-code/data/James-T-Kirk.txt +++ /dev/null @@ -1,5 +0,0 @@ -James T. Kirk, also known as Captain Kirk, is a fictional character from the Star Trek franchise. - He's the iconic captain of the starship USS Enterprise, - boldly exploring the galaxy with his crew. - Originally played by actor William Shatner, - Kirk has appeared in TV series, movies, and other media. \ No newline at end of file diff --git a/demo/embeddings-from-code/data/Jean-Luc-Picard.txt b/demo/embeddings-from-code/data/Jean-Luc-Picard.txt deleted file mode 100644 index d8c28d2..0000000 --- a/demo/embeddings-from-code/data/Jean-Luc-Picard.txt +++ /dev/null @@ -1,6 +0,0 @@ -Jean-Luc Picard is a fictional character in the Star Trek franchise. - He's most famous for being the captain of the USS Enterprise-D, - a starship exploring the galaxy in the 24th century. - Picard is known for his diplomacy, intelligence, and strong moral compass. - He's been portrayed by actor Patrick Stewart. - diff --git a/demo/embeddings-from-code/data/Michael-Burnham.txt b/demo/embeddings-from-code/data/Michael-Burnham.txt deleted file mode 100644 index a7564cf..0000000 --- a/demo/embeddings-from-code/data/Michael-Burnham.txt +++ /dev/null @@ -1,5 +0,0 @@ -Michael Burnham is the main character on the Star Trek series, Discovery. - She's a human raised on the logical planet Vulcan by Spock's father. - Burnham is intelligent and struggles to balance her human emotions with Vulcan logic. - She's become a Starfleet captain known for her determination and problem-solving skills. - Originally played by actress Sonequa Martin-Green \ No newline at end of file diff --git a/demo/embeddings-from-code/data/Philippe-Charriere.txt b/demo/embeddings-from-code/data/Philippe-Charriere.txt deleted file mode 100644 index 9d0eb4f..0000000 --- a/demo/embeddings-from-code/data/Philippe-Charriere.txt +++ /dev/null @@ -1,5 +0,0 @@ -Lieutenant Philippe CharriΓ¨re, known as the **Silent Sentinel** of the USS Discovery, - is the enigmatic programming genius whose codes safeguard the ship's secrets and operations. - His swift problem-solving skills are as legendary as the mysterious aura that surrounds him. - CharriΓ¨re, a man of few words, speaks the language of machines with unrivaled fluency, - making him the crew's unsung guardian in the cosmos. His best friend is Spiderman from the Marvel Cinematic Universe. diff --git a/demo/embeddings-from-code/main.go b/demo/embeddings-from-code/main.go deleted file mode 100644 index 6e384b2..0000000 --- a/demo/embeddings-from-code/main.go +++ /dev/null @@ -1,114 +0,0 @@ -package main - -import ( - "fmt" - "log" - "os" - "path/filepath" - "strconv" - - "github.com/parakeet-nest/parakeet/embeddings" - "github.com/parakeet-nest/parakeet/llm" -) - -func createDocFromCode(path string, fileExtension string) ([]string, error) { - textFiles, err := os.ReadDir(path) - if err != nil { - return nil, err - } - docs := []string{} - for _, textFile := range textFiles { - fmt.Println(textFile.Name()) - pathfile := path + "/" + textFile.Name() - extension := filepath.Ext(pathfile) - if extension == fileExtension { - data, err := os.ReadFile(pathfile) - if err != nil { - return nil, err - } - docs = append(docs, string(data)) - } - } - return docs, nil -} - -func main() { - ollamaUrl := "http://localhost:11434" - // if working from a container - //ollamaUrl := "http://host.docker.internal:11434" - var embeddingsModel = "magicoder:latest" - - store := embeddings.BboltVectorStore{} - store.Initialize("../embeddings.db") - - docs := []string{} - - first, err := createDocFromCode("../../examples/01-generate", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, first...) - second, err := createDocFromCode("../../examples/02-generate-stream", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, second...) - third, err := createDocFromCode("../../examples/03-chat", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, third...) - fourth, err := createDocFromCode("../../examples/04-chat-stream", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, fourth...) - fifth, err := createDocFromCode("../../examples/05-context", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, fifth...) - - sixth, err := createDocFromCode("../../examples/10-chat-conversational-memory", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, sixth...) - - seventh, err := createDocFromCode("../../examples/11-chat-conversational-bbolt/begin", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, seventh...) - - eight, err := createDocFromCode("../../examples/11-chat-conversational-bbolt/resume", ".go") - if err != nil { - log.Fatalln("😑:", err) - } - docs = append(docs, eight...) - - fmt.Println(docs) - - // Create embeddings from documents and save them in the store - for idx, doc := range docs { - fmt.Println("πŸ“ Creating embedding from document ", idx) - embedding, err := embeddings.CreateEmbedding( - ollamaUrl, - llm.Query4Embedding{ - Model: embeddingsModel, - Prompt: doc, - }, - strconv.Itoa(idx), // don't forget the id (unique identifier) - ) - fmt.Println("πŸ“¦", embedding.Id, embedding.Prompt) - if err != nil { - fmt.Println("😑:", err) - } else { - _, err := store.Save(embedding) - if err != nil { - fmt.Println("😑:", err) - } - } - } - -} diff --git a/embeddings/context.go b/embeddings/context.go new file mode 100644 index 0000000..39282f0 --- /dev/null +++ b/embeddings/context.go @@ -0,0 +1,25 @@ +package embeddings + +import ( + "fmt" + + "github.com/parakeet-nest/parakeet/llm" +) + +// GenerateContextFromSimilarities generates the context content from a slice of vector records. +// +// Parameters: +// - similarities: a slice of llm.VectorRecord representing the similarities. +// +// Returns: +// - string: the generated context content in XML format. +func GenerateContextFromSimilarities(similarities []llm.VectorRecord) string { + documentsContent := "\n" + for _, similarity := range similarities { + documentsContent += fmt.Sprintf("%s\n", similarity.Prompt) + } + documentsContent += "" + return documentsContent +} + +// TODO: GenerateContextFromSimilaritiesWithTags diff --git a/examples/01-generate/main.go b/examples/01-generate/main.go index 530b442..c85c583 100644 --- a/examples/01-generate/main.go +++ b/examples/01-generate/main.go @@ -14,9 +14,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" model := "tinydolphin" options := llm.Options{ diff --git a/examples/02-generate-stream/main.go b/examples/02-generate-stream/main.go index 87d87f2..9bbc3e0 100644 --- a/examples/02-generate-stream/main.go +++ b/examples/02-generate-stream/main.go @@ -14,9 +14,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" model := "tinydolphin" diff --git a/examples/03-chat/main.go b/examples/03-chat/main.go index d7650a5..15ed6c5 100644 --- a/examples/03-chat/main.go +++ b/examples/03-chat/main.go @@ -15,9 +15,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" model := "deepseek-coder" systemContent := `You are an expert in computer programming. diff --git a/examples/04-chat-stream/main.go b/examples/04-chat-stream/main.go index 529d5f2..0038ace 100644 --- a/examples/04-chat-stream/main.go +++ b/examples/04-chat-stream/main.go @@ -15,9 +15,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" model := "deepseek-coder" systemContent := `You are an expert in computer programming. diff --git a/examples/05-context/main.go b/examples/05-context/main.go index c8e2ff7..ebb217d 100644 --- a/examples/05-context/main.go +++ b/examples/05-context/main.go @@ -15,9 +15,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" //model := "phi3" model := "qwen:0.5b" // https://ollama.com/library/qwen:0.5b diff --git a/examples/06-summary/main.go b/examples/06-summary/main.go index f147101..44611c3 100644 --- a/examples/06-summary/main.go +++ b/examples/06-summary/main.go @@ -9,9 +9,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" model := "gemma:2b" systemContent := `Your job is to produce a final concise summary of the provided context.` diff --git a/examples/07-explain/main.go b/examples/07-explain/main.go index 38c43fc..d043827 100644 --- a/examples/07-explain/main.go +++ b/examples/07-explain/main.go @@ -12,9 +12,9 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container - ollamaUrl := "http://host.docker.internal:11434" + //ollamaUrl := "http://host.docker.internal:11434" //model := "deepseek-coder:instruct" model := "gemma:2b-instruct" diff --git a/examples/10-chat-conversational-memory/main.go b/examples/10-chat-conversational-memory/main.go index 1a96f0f..2ea8496 100644 --- a/examples/10-chat-conversational-memory/main.go +++ b/examples/10-chat-conversational-memory/main.go @@ -15,10 +15,10 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container //ollamaUrl := "http://host.docker.internal:11434" - ollamaUrl := "http://bob.local:11434" // Pi5 + //ollamaUrl := "http://bob.local:11434" // Pi5 //model := "qwen:0.5b" // speed is fast, but the model seems not recognize the history model := "tinydolphin" // fast, and perfect answer (short, brief) diff --git a/examples/11-chat-conversational-bbolt/begin/main.go b/examples/11-chat-conversational-bbolt/begin/main.go index 26cde94..7885aba 100644 --- a/examples/11-chat-conversational-bbolt/begin/main.go +++ b/examples/11-chat-conversational-bbolt/begin/main.go @@ -16,10 +16,10 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container //ollamaUrl := "http://host.docker.internal:11434" - ollamaUrl := "http://bob.local:11434" // Pi5 + //ollamaUrl := "http://bob.local:11434" // Pi5 model := "tinydolphin" // fast, and perfect answer (short, brief) diff --git a/examples/11-chat-conversational-bbolt/resume/main.go b/examples/11-chat-conversational-bbolt/resume/main.go index 0a1422f..df86f55 100644 --- a/examples/11-chat-conversational-bbolt/resume/main.go +++ b/examples/11-chat-conversational-bbolt/resume/main.go @@ -16,10 +16,10 @@ import ( ) func main() { - //ollamaUrl := "http://localhost:11434" + ollamaUrl := "http://localhost:11434" // if working from a container //ollamaUrl := "http://host.docker.internal:11434" - ollamaUrl := "http://bob.local:11434" // Pi5 + //ollamaUrl := "http://bob.local:11434" // Pi5 model := "tinydolphin" // fast, and perfect answer (short, brief) diff --git a/feature.agent.todo b/feature.agent.todo new file mode 100644 index 0000000..1535c54 --- /dev/null +++ b/feature.agent.todo @@ -0,0 +1,13 @@ +a kind of actor? +or find anoter name (AIActor, GenAIActor, etc.) + +type Agent struct { + Instruction + Context +} + +methods: +- TalkToAgent(agent) +- OnQuestionFromAgent() + +Listening -> http, mqtt, ... \ No newline at end of file diff --git a/feature.options.todo b/feature.options.todo new file mode 100644 index 0000000..a6e3460 --- /dev/null +++ b/feature.options.todo @@ -0,0 +1 @@ +add more option for the LLM diff --git a/feature.wasm.plugins.todo b/feature.wasm.plugins.todo new file mode 100644 index 0000000..cc82425 --- /dev/null +++ b/feature.wasm.plugins.todo @@ -0,0 +1,2 @@ +- add features to Parakeet +(retriever, chunker, ...) diff --git a/feature.yaml.retriever.for.context.todo b/feature.yaml.retriever.for.context.todo new file mode 100644 index 0000000..b740090 --- /dev/null +++ b/feature.yaml.retriever.for.context.todo @@ -0,0 +1 @@ +like the fine tunning principle but to create context diff --git a/feature.yaml.retriever.for.rag.todo b/feature.yaml.retriever.for.rag.todo new file mode 100644 index 0000000..9e30b58 --- /dev/null +++ b/feature.yaml.retriever.for.rag.todo @@ -0,0 +1,6 @@ +load yaml files +load yaml files with callback + +question +answer +context diff --git a/fix.raise.error.if.llm.not.loded.todo b/fix.raise.error.if.llm.not.loded.todo new file mode 100644 index 0000000..e69de29 diff --git a/go.mod b/go.mod index 58dd7a8..5783557 100644 --- a/go.mod +++ b/go.mod @@ -4,4 +4,7 @@ go 1.22.1 require go.etcd.io/bbolt v1.3.10 -require golang.org/x/sys v0.4.0 // indirect +require ( + golang.org/x/sys v0.4.0 // indirect + gopkg.in/yaml.v2 v2.4.0 +) diff --git a/go.sum b/go.sum index 2310a4e..1163620 100644 --- a/go.sum +++ b/go.sum @@ -10,5 +10,8 @@ golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/go.work b/go.work index 99d3960..316464f 100644 --- a/go.work +++ b/go.work @@ -15,6 +15,7 @@ use( examples/10-chat-conversational-memory examples/11-chat-conversational-bbolt/begin examples/11-chat-conversational-bbolt/resume - demo/embeddings-from-code - demo/search + ama/embeddings-from-code + ama/search + prompting/01-draft ) diff --git a/go.work.sum b/go.work.sum index be3575e..23cb50e 100644 --- a/go.work.sum +++ b/go.work.sum @@ -1,10 +1,3 @@ -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= -go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= -golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= -golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/parakeet.go b/parakeet.go index 025a3cf..25b76e6 100644 --- a/parakeet.go +++ b/parakeet.go @@ -1,4 +1,4 @@ package parakeet -var Version = "v0.0.3" -var About = "🦜 Parakeet v0.0.3 πŸ–– [vulcan]" +var Version = "v0.0.4" +var About = "🦜 Parakeet v0.0.4 πŸ“ [memo]" diff --git a/release.env b/release.env index 275f661..2e80c09 100644 --- a/release.env +++ b/release.env @@ -1,2 +1,2 @@ -TAG="v0.0.3" -ABOUT="🦜 Parakeet ${TAG} πŸ–– [vulcan]" +TAG="v0.0.4" +ABOUT="🦜 Parakeet ${TAG} πŸ“ [memo]"