The Enterprise-Grade Production-Ready RAG Framework
🐦 Twitter • 📢 Discord • Swarms Platform • 📙 Documentation
Here's a more detailed and larger table with descriptions and website links for each RAG system:
RAG System | Status | Description | Documentation | Website |
---|---|---|---|---|
ChromaDB | Available | A high-performance, distributed database optimized for handling large-scale AI tasks. | ChromaDB Documentation | ChromaDB |
Pinecone | Available | A fully managed vector database that makes it easy to add vector search to your applications. | Pinecone Documentation | Pinecone |
Redis | Coming Soon | An open-source, in-memory data structure store, used as a database, cache, and message broker. | Redis Documentation | Redis |
Faiss | Coming Soon | A library for efficient similarity search and clustering of dense vectors, developed by Facebook AI. | Faiss Documentation | Faiss |
HNSW | Coming Soon | A graph-based algorithm for approximate nearest neighbor search, known for its speed and accuracy. | HNSW Documentation | HNSW |
This table includes a brief description of each system, their current status, links to their documentation, and their respective websites for further information.
python 3.10
.env
with your respective keys likePINECONE_API_KEY
can be found in the.env.examples
$ pip install swarms-memory
from typing import List, Dict, Any
from swarms_memory import PineconeMemory
# Example usage
if __name__ == "__main__":
from transformers import AutoTokenizer, AutoModel
import torch
# Custom embedding function using a HuggingFace model
def custom_embedding_function(text: str) -> List[float]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
inputs = tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with torch.no_grad():
outputs = model(**inputs)
embeddings = (
outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
)
return embeddings
# Custom preprocessing function
def custom_preprocess(text: str) -> str:
return text.lower().strip()
# Custom postprocessing function
def custom_postprocess(
results: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
for result in results:
result["custom_score"] = (
result["score"] * 2
) # Example modification
return results
# Initialize the wrapper with custom functions
wrapper = PineconeMemory(
api_key="your-api-key",
environment="your-environment",
index_name="your-index-name",
embedding_function=custom_embedding_function,
preprocess_function=custom_preprocess,
postprocess_function=custom_postprocess,
logger_config={
"handlers": [
{
"sink": "custom_rag_wrapper.log",
"rotation": "1 GB",
},
{
"sink": lambda msg: print(
f"Custom log: {msg}", end=""
)
},
],
},
)
# Adding documents
wrapper.add(
"This is a sample document about artificial intelligence.",
{"category": "AI"},
)
wrapper.add(
"Python is a popular programming language for data science.",
{"category": "Programming"},
)
# Querying
results = wrapper.query("What is AI?", filter={"category": "AI"})
for result in results:
print(
f"Score: {result['score']}, Custom Score: {result['custom_score']}, Text: {result['metadata']['text']}"
)
from swarms_memory import ChromaDB
chromadb = ChromaDB(
metric="cosine",
output_dir="results",
limit_tokens=1000,
n_results=2,
docs_folder="path/to/docs",
verbose=True,
)
# Add a document
doc_id = chromadb.add("This is a test document.")
# Query the document
result = chromadb.query("This is a test query.")
# Traverse a directory
chromadb.traverse_directory()
# Display the result
print(result)
from typing import List, Dict, Any
from swarms_memory.faiss_wrapper import FAISSDB
from transformers import AutoTokenizer, AutoModel
import torch
# Custom embedding function using a HuggingFace model
def custom_embedding_function(text: str) -> List[float]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
inputs = tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with torch.no_grad():
outputs = model(**inputs)
embeddings = (
outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
)
return embeddings
# Custom preprocessing function
def custom_preprocess(text: str) -> str:
return text.lower().strip()
# Custom postprocessing function
def custom_postprocess(
results: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
for result in results:
result["custom_score"] = (
result["score"] * 2
) # Example modification
return results
# Initialize the wrapper with custom functions
wrapper = FAISSDB(
dimension=768,
index_type="Flat",
embedding_function=custom_embedding_function,
preprocess_function=custom_preprocess,
postprocess_function=custom_postprocess,
metric="cosine",
logger_config={
"handlers": [
{
"sink": "custom_faiss_rag_wrapper.log",
"rotation": "1 GB",
},
{"sink": lambda msg: print(f"Custom log: {msg}", end="")},
],
},
)
# Adding documents
wrapper.add(
"This is a sample document about artificial intelligence.",
{"category": "AI"},
)
wrapper.add(
"Python is a popular programming language for data science.",
{"category": "Programming"},
)
# Querying
results = wrapper.query("What is AI?")
for result in results:
print(
f"Score: {result['score']}, Custom Score: {result['custom_score']}, Text: {result['metadata']['text']}"
)
MIT
Please cite Swarms in your paper or your project if you found it beneficial in any way! Appreciate you.
@misc{swarms,
author = {Gomez, Kye},
title = {{Swarms: The Multi-Agent Collaboration Framework}},
howpublished = {\url{https://github.com/kyegomez/swarms}},
year = {2023},
note = {Accessed: Date}
}