Skip to content

Commit

Permalink
📦 🦜 Parakeet v0.0.4 📝 [memo]
Browse files Browse the repository at this point in the history
  • Loading branch information
k33g committed May 19, 2024
1 parent f0aa577 commit ed4935d
Show file tree
Hide file tree
Showing 49 changed files with 556 additions and 187 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
ollama-from-scratch
prompting
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"workbench.iconTheme": "material-icon-theme",
"workbench.colorTheme": "Catppuccin Frappé",
"editor.fontSize": 18,
"terminal.integrated.fontSize": 18,
"editor.fontSize": 15,
"terminal.integrated.fontSize": 15,
"editor.insertSpaces": true,
"editor.tabSize": 4,
"editor.detectIndentation": true,
Expand Down
116 changes: 116 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,122 @@ store.Initialize("../embeddings.db")
> - `examples/09-embeddings-bbolt/create-embeddings`: create and populate the vector store
> - `examples/09-embeddings-bbolt/use-embeddings`: search similarities in the vector store

## Create embeddings from text files and Similarity search

### Create embeddings
```golang
ollamaUrl := "http://localhost:11434"
embeddingsModel := "all-minilm"

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

// Parse all golang source code of the examples
// Create embeddings from documents and save them in the store
counter := 0
_, err := content.ForEachFile("../../examples", ".go", func(path string) error {
data, err := os.ReadFile(path)
if err != nil {
return err
}

fmt.Println("📝 Creating embedding from:", path)
counter++
embedding, err := embeddings.CreateEmbedding(
ollamaUrl,
llm.Query4Embedding{
Model: embeddingsModel,
Prompt: string(data),
},
strconv.Itoa(counter), // don't forget the id (unique identifier)
)
fmt.Println("📦 Created: ", len(embedding.Embedding))

if err != nil {
fmt.Println("😡:", err)
} else {
_, err := store.Save(embedding)
if err != nil {
fmt.Println("😡:", err)
}
}
return nil
})
if err != nil {
log.Fatalln("😡:", err)
}
```

### Similarity search

```golang
ollamaUrl := "http://localhost:11434"
embeddingsModel := "all-minilm"
chatModel := "magicoder:latest"

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

systemContent := `You are a Golang developer and an expert in computer programming.
Please make friendly answer for the noobs. Use the provided context and doc to answer.
Add source code examples if you can.`

// Question for the Chat system
userContent := `How to create a stream chat completion with Parakeet?`

// Create an embedding from the user question
embeddingFromQuestion, err := embeddings.CreateEmbedding(
ollamaUrl,
llm.Query4Embedding{
Model: embeddingsModel,
Prompt: userContent,
},
"question",
)
if err != nil {
log.Fatalln("😡:", err)
}
fmt.Println("🔎 searching for similarity...")

similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.3)

// Generate the context from the similarities
// This will generate a string with a content like this one:
// `<context><doc>...<doc><doc>...<doc></context>`
documentsContent := embeddings.GenerateContextFromSimilarities(similarities)

fmt.Println("🎉 similarities", len(similarities))

query := llm.Query{
Model: chatModel,
Messages: []llm.Message{
{Role: "system", Content: systemContent},
{Role: "system", Content: documentsContent},
{Role: "user", Content: userContent},
},
Options: llm.Options{
Temperature: 0.4,
RepeatLastN: 2,
},
Stream: false,
}

fmt.Println("")
fmt.Println("🤖 answer:")

// Answer the question
_, err = completion.ChatStream(ollamaUrl, query,
func(answer llm.Answer) error {
fmt.Print(answer.Message.Content)
return nil
})

if err != nil {
log.Fatal("😡:", err)
}
```

## Demos

- https://github.com/parakeet-nest/parakeet-demo
Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions demo/README.md → ama/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
# Demo: use RAG to add Parakeet knowledge to LLMs
> ask me anything about Parakeet
> 🚧 wip
File renamed without changes.
File renamed without changes.
63 changes: 63 additions & 0 deletions ama/embeddings-from-code/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package main

import (
"fmt"
"log"
"os"
"strconv"

"github.com/parakeet-nest/parakeet/content"
"github.com/parakeet-nest/parakeet/embeddings"
"github.com/parakeet-nest/parakeet/llm"
)

func main() {
ollamaUrl := "http://localhost:11434"
// if working from a container
//ollamaUrl := "http://host.docker.internal:11434"
//var embeddingsModel = "magicoder:latest"

embeddingsModel := "all-minilm"

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

// Parse all source code of the examples
// Create embeddings from documents and save them in the store

counter := 0
_, err := content.ForEachFile("../../examples", ".go", func(path string) error {
data, err := os.ReadFile(path)
if err != nil {
return err
}

fmt.Println("📝 Creating embedding from:", path)
counter++
embedding, err := embeddings.CreateEmbedding(
ollamaUrl,
llm.Query4Embedding{
Model: embeddingsModel,
Prompt: string(data),
},
strconv.Itoa(counter), // don't forget the id (unique identifier)
)
fmt.Println("📦 Created: ", len(embedding.Embedding))

if err != nil {
fmt.Println("😡:", err)
} else {
_, err := store.Save(embedding)
if err != nil {
fmt.Println("😡:", err)
}
}

return nil
})
if err != nil {
log.Fatalln("😡:", err)
}


}
4 changes: 4 additions & 0 deletions ama/search/demo.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
🔎 searching for similarity...
🎉 similarities 15

🤖 answer:
File renamed without changes.
File renamed without changes.
22 changes: 8 additions & 14 deletions demo/search/main.go → ama/search/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,19 @@ func main() {
//ollamaUrl := "http://host.docker.internal:11434"
//ollamaUrl := "http://bob.local:11434"

var embeddingsModel = "magicoder:latest"
var smallChatModel = "magicoder:latest"
embeddingsModel := "all-minilm"
chatModel := "magicoder:latest"

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

systemContent := `You are a Golang developer and an expert in computer programming.
Please make friendly answer for the noobs. Use the provided context to answer.
Please make friendly answer for the noobs. Use the provided context and doc to answer.
Add source code examples if you can.`

// Question for the Chat system
//userContent := `[Brief] How to create a stream completion with Parakeet?`
userContent := `[Brief] How to create a stream chat completion with Parakeet?`

userContent := `How to create a stream chat completion with Parakeet?`

// Create an embedding from the user question
embeddingFromQuestion, err := embeddings.CreateEmbedding(
Expand All @@ -44,20 +43,15 @@ func main() {
}
fmt.Println("🔎 searching for similarity...")

//similarity, _ := store.SearchMaxSimilarity(embeddingFromQuestion)

similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.0)
similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.3)

documentsContent := "<context>"
for _, similarity := range similarities {
documentsContent += fmt.Sprintf("<doc>%s</doc>", similarity.Prompt)
}
documentsContent += "</context>"
documentsContent := embeddings.GenerateContextFromSimilarities(similarities)

fmt.Println("🎉 similarities", similarities)
fmt.Println("🎉 similarities", len(similarities))

query := llm.Query{
Model: smallChatModel,
Model: chatModel,
Messages: []llm.Message{
{Role: "system", Content: systemContent},
{Role: "system", Content: documentsContent},
Expand Down
1 change: 1 addition & 0 deletions completion/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ func completionStream(url string, kindOfCompletion string, query llm.Query, onCh
onChunk(llm.Answer{})
}

// ? 🤔 and if I used answer + error as a parameter?
err = onChunk(answer)

// generate an error to stop the stream
Expand Down
79 changes: 79 additions & 0 deletions content/content_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package content

import (
"fmt"
"strings"
"testing"
)

func TestGetArrayOfContentFiles(t *testing.T) {

content, err := GetArrayOfContentFiles("./contents-for-test", ".txt")

if err != nil {
t.Fatal(err)
}

fmt.Println("📝 content", content)

if content[0] != "hello world" {
t.Fatal("hello world not found")
}
if content[1] != "hey people" {
t.Fatal("hey people not found")
}

if content[2] != "hello world" {
t.Fatal("hello world not found")
}
if content[3] != "hey people" {
t.Fatal("hey people not found")
}

}

func TestGetMapOfContentFiles(t *testing.T) {

content, err := GetMapOfContentFiles("./contents-for-test", ".txt")

if err != nil {
t.Fatal(err)
}

fmt.Println("📝 content", content)

if content["contents-for-test/01/hello.txt"] != "hello world" {
t.Fatal("hello world not found")
}
if content["contents-for-test/02/hey.txt"] != "hey people" {
t.Fatal("hey people not found")
}
if content["contents-for-test/01/hello.txt"] != "hello world" {
t.Fatal("hello world not found")
}
if content["contents-for-test/02/hey.txt"] != "hey people" {
t.Fatal("hey people not found")
}

}

func TestGenerateContextFromDocs(t *testing.T) {
content, err := GetArrayOfContentFiles("./contents-for-test", ".txt")

if err != nil {
t.Fatal(err)
}

context := GenerateContextFromDocs(content)

fmt.Println("📝 context", context)

if strings.Contains(context, "<doc>hello world</doc>") == false {
t.Fatal("hello world not found")
}
if strings.Contains(context, "<doc>hey people</doc>") == false {
t.Fatal("hey people not found")
}

}

1 change: 1 addition & 0 deletions content/contents-for-test/01/hello.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world
1 change: 1 addition & 0 deletions content/contents-for-test/01/hey.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hey people
1 change: 1 addition & 0 deletions content/contents-for-test/02/hello.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world
1 change: 1 addition & 0 deletions content/contents-for-test/02/hey.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hey people
29 changes: 29 additions & 0 deletions content/context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package content

import "fmt"

// GenerateContextFromDocs generates the context content from a slice of documents.
//
// Remarks: you can use the generated content to add context to a prompt for an LLM.
//
// Parameters:
// - docs: a slice of strings representing the documents.
//
// Returns:
// - string: the generated context content in XML format.
func GenerateContextFromDocs(docs []string) string {

documentsContent := "<context>\n"
for _, doc := range docs {
documentsContent += fmt.Sprintf("<doc>%s</doc>\n", doc)
}
documentsContent += "</context>"
return documentsContent
}
/*
This is a Go function called GenerateContextFromDocs that takes a slice of strings as input and returns a string in XML format.
The function generates the context content from a slice of documents by iterating over each document in the slice and appending it to a string in the format <doc>document content</doc>.
Finally, the function wraps the entire content in <context> tags and returns it.
*/

// TODO: GenerateContextWithTags
Loading

0 comments on commit ed4935d

Please sign in to comment.