📦 🦜 Parakeet v0.2.1 🧇 [waffle]

parakeet-nest · Oct 13, 2024 · ce29451 · ce29451
1 parent 1e1a8d7
commit ce29451
Show file tree

Hide file tree

Showing 145 changed files with 2,962 additions and 106 deletions.
diff --git a/LAST_RELEASE.md b/LAST_RELEASE.md
@@ -2,6 +2,99 @@
 
 ## Release notes
 
+## v0.2.1 🧇 [waffle]
+
+#### What's new in v0.2.1?
+
+##### Contextual Retrieval
+
+> Inspired by: [Introducing Contextual Retrieval](https://www.anthropic.com/news/contextual-retrieval)
+
+2 new methods are available in the `content` package:
+
+- `CreateChunkContext`
+- `CreateChunkContextWithPromptTemplate`
+
+`CreateChunkContext` generates a succinct context for a given chunk within the whole document content.
+This context is intended to improve search retrieval of the chunk.
+
+
+`CreateChunkContextWithPromptTemplate` generates a contextual response based on a given prompt template and document content.
+It interpolates the template with the provided document and chunk content, then uses an LLM to generate a response.
+
+
+##### UI Helpers
+
+2 new methods are available in the `ui` package:
+
+
+If you use Parakeet to create CLI applications, you can use the `ui` package to create a (very) simple UI.
+
+- `Input`
+- `Println`
+
+`Input` displays a prompt with the specified color and waits for user input.
+
+`Println` prints the provided strings with the specified color using the lipgloss styling library.
+
+
+##### CLI Helpers
+
+8 new methods are available in the `cli` package:
+
+- `Settings` parses command-line arguments and flags.
+- `FlagValue` retrieves the value of a flag by its name from a slice of Flag structs.
+- `HasArg` checks if an argument with the specified name exists in the provided slice of arguments.
+- `HasFlag` checks if a flag with the specified name exists in the provided slice of flags.
+- `ArgsTail` extracts the names from a slice of Arg structs and returns them as a slice of strings.
+- `FlagsTail` takes a slice of Flag structs and returns a slice of strings containing the names of those flags.
+- `FlagsWithNamesTail` takes a slice of Flag structs and returns a slice of strings, where each string is a formatted pair of the flag's name and value in the form "name=value".
+- `HasSubsequence` checks if the given subsequence of strings (subSeq) is present in the tail of the provided arguments (args).
+
+**Example**:
+
+```go
+// default values
+ollamaUrl := "http://localhost:11434"
+chatModel := "llama3.1:8b"
+embeddingsModel := "bge-m3:latest"
+
+args, flags := cli.Settings()
+
+if cli.HasFlag("url", flags) {
+    ollamaUrl = cli.FlagValue("url", flags)
+}
+
+if cli.HasFlag("chat-model", flags) {
+    chatModel = cli.FlagValue("chat-model", flags)
+}
+
+if cli.HasFlag("embeddings-model", flags) {
+    embeddingsModel = cli.FlagValue("embeddings-model", flags)
+}
+
+switch cmd := cli.ArgsTail(args); cmd[0] {
+case "create-embeddings":
+    fmt.Println(embeddingsModel)
+case "chat":
+    fmt.Println(chatModel)
+default:
+    fmt.Println("Unknown command:", cmd[0])
+}
+```
+
+##### New samples
+
+- 52-constraints: Preventing an LLM from talking about certain things
+- 53-constraints: Preventing an LLM from talking about certain things
+- 54-constraints-webapp: Preventing an LLM from talking about certain things
+- 55-create-npc: Create a NPC with `nemotron-mini` and chat with him
+- 56-jean-luc-picard: Chat with Jean-Luc Picard
+- 57-jean-luc-picard-rag: Chat with Jean-Luc Picard + RAG
+- 58-michael-burnham: Chat with Michael Burnham
+- 59-jean-luc-picard-contextual-retrieval: Chat with Jean-Luc Picard + Contextual Retrieval
+- 60-safety-models: Safety Models fine-tuned for content safety classification of LLM inputs and responses
+
 ### v0.2.0 🍕 [pizza]
 
 #### What's new in v0.2.0?

diff --git a/cli/cli.go b/cli/cli.go
@@ -0,0 +1,189 @@
+package cli
+
+import (
+	"fmt"
+	"os"
+	"strings"
+)
+
+type Arg struct {
+	Name string
+}
+
+type Flag struct {
+	Name  string
+	Value string
+}
+
+// Settings parses command-line arguments and flags.
+//
+// It skips the program name and processes the remaining arguments.
+// Arguments that start with "--" are considered flags, and the function
+// checks if the next argument is a value for the flag. If so, it pairs
+// the flag with its value; otherwise, it pairs the flag with an empty string.
+// Arguments that do not start with "--" are considered positional arguments.
+//
+// Returns two slices: one containing the positional arguments and the other
+// containing the flags with their respective values.
+func Settings() ([]Arg, []Flag) {
+	args := os.Args[1:] // Skip the program name
+	var arguments []Arg
+	var flags []Flag
+
+	for i := 0; i < len(args); i++ {
+		if strings.HasPrefix(args[i], "--") {
+			flagName := strings.TrimPrefix(args[i], "--")
+			if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") {
+				flags = append(flags, Flag{Name: flagName, Value: args[i+1]})
+				i++ // Skip the next argument as it's the flag value
+			} else {
+				flags = append(flags, Flag{Name: flagName, Value: ""})
+			}
+		} else {
+			arguments = append(arguments, Arg{Name: args[i]})
+		}
+	}
+
+	return arguments, flags
+}
+
+// FlagValue retrieves the value of a flag by its name from a slice of Flag structs.
+// If the flag is not found, it returns an empty string.
+//
+// Parameters:
+//   - name: The name of the flag to search for.
+//   - flags: A slice of Flag structs to search within.
+//
+// Returns:
+//   The value of the flag if found, otherwise an empty string.
+func FlagValue(name string, flags []Flag) string {
+	for _, flag := range flags {
+		if flag.Name == name {
+			return flag.Value
+		}
+	}
+	return ""
+}
+
+// HasArg checks if an argument with the specified name exists in the provided slice of arguments.
+// 
+// Parameters:
+// - name: The name of the argument to search for.
+// - args: A slice of Arg structures to search within.
+//
+// Returns:
+// - bool: True if an argument with the specified name is found, otherwise false.
+func HasArg(name string, args []Arg) bool {
+	for _, arg := range args {
+		if arg.Name == name {
+			return true
+		}
+	}
+	return false
+}
+
+// HasFlag checks if a flag with the specified name exists in the provided slice of flags.
+// 
+// Parameters:
+// - name: The name of the flag to search for.
+// - flags: A slice of Flag objects to search within.
+//
+// Returns:
+// - bool: True if a flag with the specified name is found, otherwise false.
+func HasFlag(name string, flags []Flag) bool {
+	for _, flag := range flags {
+		if flag.Name == name {
+			return true
+		}
+	}
+	return false
+}
+
+// ArgsTail extracts the names from a slice of Arg structs and returns them as a slice of strings.
+// 
+// Parameters:
+// - args: A slice of Arg structs from which the names will be extracted.
+//
+// Returns:
+// - A slice of strings containing the names of the provided Arg structs.
+func ArgsTail(args []Arg) []string {
+	names := make([]string, len(args))
+	for i, arg := range args {
+		names[i] = arg.Name
+	}
+	return names
+}
+
+// FlagsTail takes a slice of Flag structs and returns a slice of strings
+// containing the names of those flags.
+//
+// Parameters:
+//   flags []Flag: A slice of Flag structs.
+//
+// Returns:
+//   []string: A slice of strings containing the names of the flags.
+func FlagsTail(flags []Flag) []string {
+	names := make([]string, len(flags))
+	for i, flag := range flags {
+		names[i] = flag.Name
+	}
+	return names
+}
+
+// FlagsWithNamesTail takes a slice of Flag structs and returns a slice of strings,
+// where each string is a formatted pair of the flag's name and value in the form "name=value".
+//
+// Parameters:
+//   flags []Flag - A slice of Flag structs, each containing a Name and a Value.
+//
+// Returns:
+//   []string - A slice of strings, each representing a flag's name and value pair.
+func FlagsWithNamesTail(flags []Flag) []string {
+	pairs := make([]string, len(flags))
+	for i, flag := range flags {
+		pairs[i] = fmt.Sprintf("%s=%s", flag.Name, flag.Value)
+	}
+	return pairs
+}
+
+// containsSubsequence checks if the subSeq slice is a subsequence of the mainSeq slice.
+// A subsequence is a sequence that appears in the same relative order, but not necessarily consecutively.
+// 
+// Parameters:
+// - mainSeq: The main sequence of strings to be checked.
+// - subSeq: The subsequence of strings to be searched for within the main sequence.
+//
+// Returns:
+// - bool: true if subSeq is a subsequence of mainSeq, false otherwise.
+func containsSubsequence(mainSeq, subSeq []string) bool {
+	if len(subSeq) == 0 {
+		return true
+	}
+	if len(mainSeq) == 0 {
+		return false
+	}
+
+	subIdx := 0
+	for _, item := range mainSeq {
+		if item == subSeq[subIdx] {
+			subIdx++
+			if subIdx == len(subSeq) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// HasSubsequence checks if the given subsequence of strings (subSeq) is present
+// in the tail of the provided arguments (args).
+//
+// Parameters:
+//   - args: A slice of Arg representing the arguments to be checked.
+//   - subSeq: A slice of strings representing the subsequence to look for.
+//
+// Returns:
+//   - bool: True if the subsequence is found in the tail of the arguments, false otherwise.
+func HasSubsequence(args []Arg, subSeq []string) bool {
+	return containsSubsequence(ArgsTail(args), subSeq)
+}
diff --git a/content/chunk_context.go b/content/chunk_context.go
@@ -0,0 +1,73 @@
+package content
+
+import (
+	"github.com/parakeet-nest/parakeet/completion"
+	"github.com/parakeet-nest/parakeet/llm"
+)
+
+// CreateChunkContextWithPromptTemplate generates a contextual response based on a given prompt template and document content.
+// It interpolates the template with the provided document and chunk content, then uses an LLM to generate a response.
+//
+// Parameters:
+//   - promptTpl: A string template for the prompt.
+//   - wholeDocumentContent: The content of the entire document.
+//   - chunk: A Chunk struct containing a portion of the document content.
+//   - ollamaUrl: The URL of the LLM service.
+//   - contextualModel: The model to be used for generating the response.
+//   - options: Options for the LLM query.
+//
+// Returns:
+//   - A string containing the generated response.
+//   - An error if the process fails at any step.
+func CreateChunkContextWithPromptTemplate(promptTpl, wholeDocumentContent string, chunk Chunk, ollamaUrl, contextualModel string, options llm.Options) (string, error) {
+
+	// Contextual retrieval
+	data := map[string]interface{}{
+		"wholeDocument": wholeDocumentContent,
+		"chunkContent":  chunk.Content,
+	}
+	contextualPrompt, err := InterpolateString(promptTpl, data)
+	if err != nil {
+		return "", err
+	}
+
+	question := llm.GenQuery{
+		Model:   contextualModel,
+		Prompt:  contextualPrompt,
+		Options: options,
+	}
+	answer, err := completion.Generate(ollamaUrl, question)
+	if err != nil {
+		return "", err
+	}
+	return answer.Response, nil
+}
+
+
+// CreateChunkContext generates a succinct context for a given chunk within the whole document content.
+// This context is intended to improve search retrieval of the chunk.
+//
+// Parameters:
+//   - wholeDocumentContent: The entire content of the document as a string.
+//   - chunk: The specific chunk of the document for which context is to be generated.
+//   - ollamaUrl: The URL for the Ollama service.
+//   - contextualModel: The model used for generating the context.
+//   - options: Additional options for the LLM (Language Model).
+//
+// Returns:
+//   - A string containing the succinct context for the chunk.
+//   - An error if the context generation fails.
+func CreateChunkContext(wholeDocumentContent string, chunk Chunk, ollamaUrl, contextualModel string, options llm.Options) (string, error) {
+	promptTemplateForContext := `<document> 
+	{{.wholeDocument}} 
+	</document> 
+	Here is the below chunk we want to situate within the above whole document 
+	<chunk> 
+	{{.chunkContent}} 
+	</chunk> 
+	Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. 
+	Answer only with the succinct context and nothing else. 
+	`
+	return CreateChunkContextWithPromptTemplate(promptTemplateForContext, wholeDocumentContent, chunk, ollamaUrl, contextualModel, options)
+
+}