Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add rag stack definition, tembo-py #560

Merged
merged 3 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions tembo-operator/src/stacks/templates/rag.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
name: rag
description: A Tembo Postgres Stack configured for retrieval augmented generation (RAG).
repository: "quay.io/tembo"
organization: tembo
images:
14: "standard-cnpg:14-a0a5ab5"
15: "standard-cnpg:15-a0a5ab5"
16: "standard-cnpg:16-a0a5ab5"
stack_version: 0.1.0
appServices:
- image: quay.io/tembo/vector-serve:32ce013
name: embeddings
env:
- name: TMPDIR
value: /models
- name: BATCH_SIZE
value: "1000"
- name: XDG_CACHE_HOME
value: /models/.cache
routing:
- port: 3000
ingressPath: /vectordb
middlewares:
- map-embeddings
middlewares:
- !replacePathRegex
name: map-embeddings
config:
regex: ^\/vectordb\/?
replacement: /v1/embeddings
resources:
requests:
cpu: 500m
memory: 1500Mi
limits:
cpu: 4000m
memory: 1500Mi
storage:
volumeMounts:
- mountPath: /models
name: hf-data-vol
volumes:
- ephemeral:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
name: hf-data-vol
trunk_installs:
- name: pgmq
version: 1.1.1
- name: vectorize
version: 0.10.0
- name: pgvector
version: 0.6.0
- name: pg_stat_statements
version: 1.10.0
extensions:
- name: vector
locations:
- database: postgres
enabled: true
version: 0.6.0
- name: pg_cron
locations:
- database: postgres
enabled: true
version: 1.5.2
- name: pgmq
locations:
- database: postgres
enabled: true
version: 1.1.1
- name: vectorize
locations:
- database: postgres
enabled: true
version: 0.10.0
- name: pg_stat_statements
locations:
- database: postgres
enabled: true
version: 1.10.0
compute_templates:
- cpu: 0.25
memory: 1Gi
instance_class: GeneralPurpose
- cpu: 0.5
memory: 2Gi
instance_class: GeneralPurpose
- cpu: 1
memory: 4Gi
instance_class: GeneralPurpose
- cpu: 2
memory: 8Gi
instance_class: GeneralPurpose
- cpu: 4
memory: 16Gi
instance_class: GeneralPurpose
- cpu: 8
memory: 32Gi
instance_class: GeneralPurpose
- cpu: 0.5
memory: 1Gi
instance_class: ComputeOptimized
- cpu: 1
memory: 2Gi
instance_class: ComputeOptimized
- cpu: 2
memory: 4Gi
instance_class: ComputeOptimized
- cpu: 4
memory: 8Gi
instance_class: ComputeOptimized
- cpu: 8
memory: 16Gi
instance_class: ComputeOptimized
- cpu: 0.5
memory: 4Gi
instance_class: MemoryOptimized
- cpu: 1
memory: 8Gi
instance_class: MemoryOptimized
- cpu: 2
memory: 16Gi
instance_class: MemoryOptimized
- cpu: 4
memory: 32Gi
instance_class: MemoryOptimized
postgres_config_engine: standard
postgres_config:
- name: cron.host
value: /controller/run
- name: vectorize.host
value: postgresql:///postgres?host=/controller/run
- name: autovacuum_vacuum_cost_limit
value: -1
- name: autovacuum_vacuum_scale_factor
value: 0.05
- name: autovacuum_vacuum_insert_scale_factor
value: 0.05
- name: autovacuum_analyze_scale_factor
value: 0.05
- name: checkpoint_timeout
value: 10min
- name: track_activity_query_size
value: 2048
- name: wal_compression
value: 'on'
- name: track_io_timing
value: 'on'
- name: log_min_duration_statement # https://www.postgresql.org/docs/15/runtime-config-logging.html
value: 1000
- name: pg_stat_statements.track
value: all
- name: shared_preload_libraries
value: vectorize,pg_stat_statements,pg_cron
- name: vectorize.embedding_service_url
value: http://${NAMESPACE}-embeddings.${NAMESPACE}.svc.cluster.local:3000/v1/embeddings
26 changes: 26 additions & 0 deletions tembo-py/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
data/

**/*/__pycache__/
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
coverage-junit-unit-tests.xml
coverage-report-unit-tests.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cov.xml
.vscode
*.json

# py files on root of project
*.py

# files and dirs beginning with a underscore
_*
23 changes: 23 additions & 0 deletions tembo-py/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
SOURCE_OBJECTS=tembo_py tests

format:
poetry run ruff check --silent --fix --exit-zero ${SOURCE_OBJECTS}
poetry run black ${SOURCE_OBJECTS}


check:
poetry run ruff check ${SOURCE_OBJECTS}
poetry run black --check ${SOURCE_OBJECTS}
poetry run mypy ${SOURCE_OBJECTS}

test:
poetry run pytest -s \
--ignore=tests/integration_tests \
--cov=./ \
--cov-report=xml:coverage-report-unit-tests.xml \
--junitxml=coverage-junit-unit-tests.xml \
--cov-report term


run.postgres:
docker run -p 5432:5432 --name tembo-postgres -e POSTGRES_PASSWORD=postgres -d quay.io/tembo/vectorize-pg:latest
12 changes: 12 additions & 0 deletions tembo-py/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# tembo-py

The official Python client for Tembo.io


## Installation

```bash
pip install tembo-py
```


23 changes: 23 additions & 0 deletions tembo-py/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[tool.poetry]
name = "tembo-py"
version = "0.1.0"
description = "The official Python client for Tembo.io"
authors = ["Adam Hendel <adam@tembo.io>"]
license = "Apache-2"
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.8.1,<3.12"
llama-index = "^0.10.9"
llama-index-vector-stores-postgres = "^0.1.1"
tiktoken = "^0.6.0"
psycopg = "^3.1.18"

[tool.poetry.group.dev.dependencies]
ruff = "^0.2.2"
black = "^24.2.0"
mypy = "^1.8.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file added tembo-py/tembo_py/__init__.py
Empty file.
144 changes: 144 additions & 0 deletions tembo-py/tembo_py/rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from dataclasses import dataclass, field
import json
import logging
from typing import Optional

from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
import psycopg


@dataclass
class TemboRAGcontroller:
project_name: str
chunk_size: Optional[int] = None
chat_model: str = "gpt-3.5-turbo"
sentence_transformer: str = "sentence-transformers/all-MiniLM-L12-v2"
connection_string: Optional[str] = None
_table_name: str = "vectorize._data_{project_name}"

# post-init
sentence_splitter: SentenceSplitter = field(
default_factory=SentenceSplitter, init=False
)

def __post_init__(self):
chunk_size = self.chunk_size or get_context_size(self.chat_model)
self.sentence_splitter = SentenceSplitter(chunk_size=chunk_size)
self.chunk_size = chunk_size

def prepare_from_directory(
self, document_dir: str, **kwargs
) -> list[tuple[str, str, str, str]]:
documents = SimpleDirectoryReader(document_dir).load_data()
chunks = self.sentence_splitter.get_nodes_from_documents(documents, **kwargs)
chunks_for_copy: list[tuple[str, str, dict, str]] = []
for chunk in chunks:
chunks_for_copy.append(
(
chunk.metadata["file_name"],
chunk.id_,
json.dumps(chunk.metadata),
chunk.get_content(),
)
)
logging.info("Prepared %s chunks", len(chunks_for_copy))
return chunks_for_copy

def load_documents(
self,
documents: list[tuple[str, str, str, str]],
connection_string: Optional[str] = None,
):
connection_string = connection_string or self.connection_string
if not connection_string:
raise ValueError("No connection string provided")
self._init_table(self.project_name, connection_string)
table = self._table_name.format(project_name=self.project_name)
self._load_docs(table, documents, connection_string)

def init_rag(
self, connection_string: Optional[str] = None, transformer: Optional[str] = None
):
connection_string = connection_string or self.connection_string
if not connection_string:
raise ValueError("No connection string provided")

xformer = transformer or self.sentence_transformer
q = """
SELECT vectorize.init_rag(
agent_name => %s,
table_name => %s,
schema => %s,
unique_record_id => 'record_id',
"column" => 'content',
transformer => %s
);
"""
schema, table = self._table_name.format(project_name=self.project_name).split(
"."
)
with psycopg.connect(connection_string, autocommit=True) as conn:
cur = conn.cursor()
cur.execute(q, (self.project_name, table, schema, xformer))

def _load_docs(
self,
table: str,
documents: list[tuple[str, str, str, str]],
connection_string: str,
):
with psycopg.connect(connection_string, autocommit=True) as conn:
cur = conn.cursor()
sql = f"COPY {table} (document_name, chunk_id, meta, content) FROM STDIN"
# log every 10% completion
num_chunks = len(documents)
deca = num_chunks // 10
with cur.copy(sql) as copy:
for i, row in enumerate(documents):
if i % deca == 0:
logging.info("writing row %s / %s", i, num_chunks)
copy.write_row(row)

def _init_table(self, project_name: str, connection_string: str):
table = self._table_name.format(project_name=project_name)
q = f"""
CREATE TABLE IF NOT EXISTS {table} (
record_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
document_name TEXT NOT NULL,
chunk_id TEXT NOT NULL,
meta JSONB,
content TEXT NOT NULL
)
"""
with psycopg.connect(connection_string, autocommit=True) as conn:
cur = conn.cursor()
cur.execute(q)


def get_context_size(model):
if model.startswith("gpt-4-1106"):
return 128000
if model.startswith("gpt-4-32k"):
return 32768
if model.startswith("gpt-4"):
return 8192
if model.startswith("gpt-3.5-turbo-16k"):
return 16384
if model.startswith("gpt-3.5-turbo"):
return 4096
if model in ("text-davinci-002", "text-davinci-003"):
return 4097
if model in ("ada", "babbage", "curie"):
return 2049
if model == "code-cushman-001":
return 2048
if model == "code-davinci-002":
return 8001
if model == "davinci":
return 2049
if model in ("text-ada-001", "text-babbage-001", "text-curie-001"):
return 2049
if model == "text-embedding-ada-002":
return 8192
return 4096
Loading