Skip to content

Commit

Permalink
Merge pull request #51 from bossjones/feature-maybe-langgraph
Browse files Browse the repository at this point in the history
feat: chroma fixes
  • Loading branch information
bossjones authored Aug 21, 2024
2 parents f297381 + 321384a commit b6150bc
Show file tree
Hide file tree
Showing 55 changed files with 12,764 additions and 1,156 deletions.
4 changes: 4 additions & 0 deletions .github/dependabot/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,8 @@ jmespath==1.0.1
joblib==1.4.2
# via nltk
# via scikit-learn
jq==1.8.0
# via goob-ai
jsbeautifier==1.15.1
# via mkdocs-mermaid2-plugin
json5==0.9.25
Expand Down Expand Up @@ -1190,6 +1192,8 @@ pygments==2.18.0
# via rich
pyinspect==0.1.0
# via goob-ai
pyinstrument==4.7.2
# via goob-ai
pyinvoke==1.0.4
# via goob-ai
pylint==3.2.6
Expand Down
4 changes: 4 additions & 0 deletions .github/dependabot/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ jmespath==1.0.1
joblib==1.4.2
# via nltk
# via scikit-learn
jq==1.8.0
# via goob-ai
json5==0.9.25
# via jupyterlab-server
jsonpatch==1.33
Expand Down Expand Up @@ -942,6 +944,8 @@ pygments==2.18.0
# via rich
pyinspect==0.1.0
# via goob-ai
pyinstrument==4.7.2
# via goob-ai
pyinvoke==1.0.4
# via goob-ai
pymongo==4.8.0
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/ci-upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ jobs:
- name: Start Chroma
id: chroma-docker-compose-up
run: |
git clone --depth 1 --branch feature-boss-chroma https://github.com/bossjones/chroma.git vendored_chroma
cd vendored_chroma
docker compose up -d --build
mkdir -p ./src/goob_ai/data/chroma/vectorstorage || true
# git clone --depth 1 --branch feature-boss-chroma https://github.com/bossjones/chroma.git vendored_chroma
# cd vendored_chroma
docker compose up -d
docker ps -a
continue-on-error: true

Expand Down Expand Up @@ -119,6 +120,7 @@ jobs:
run: |
sudo apt-get install -y tesseract-ocr tesseract-ocr-por libyaml-dev poppler-utils
sudo apt install ffmpeg -y
sudo apt-get install autoconf automake build-essential libtool python3-dev libsqlite3-dev -y
# Allow debugging with tmate
- name: Setup tmate session
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ jobs:
- name: Start Chroma
id: chroma-docker-compose-up
run: |
git clone --depth 1 --branch feature-boss-chroma https://github.com/bossjones/chroma.git vendored_chroma
cd vendored_chroma
docker compose up -d --build
mkdir -p ./src/goob_ai/data/chroma/vectorstorage || true
# git clone --depth 1 --branch feature-boss-chroma https://github.com/bossjones/chroma.git vendored_chroma
# cd vendored_chroma
docker compose up -d
docker ps -a
continue-on-error: true

Expand Down Expand Up @@ -138,6 +139,7 @@ jobs:
run: |
sudo apt-get install -y tesseract-ocr tesseract-ocr-por libyaml-dev poppler-utils
sudo apt install ffmpeg -y
sudo apt-get install autoconf automake build-essential libtool python3-dev libsqlite3-dev -y
- name: Install dependencies
# if: steps.cached-rye-dependencies.outputs.cache-hit != 'true'
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ myenv/
venv/

# local dev
docker/
# docker/

# Unit test / coverage reports
.cache
Expand Down
11 changes: 11 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,14 @@ ci:

manhole-shell:
./scripts/manhole-shell

find-cassettes-dirs:
fd -td cassettes

delete-cassettes:
fd -td cassettes -X rm -ri

regenerate-cassettes:
fd -td cassettes -X rm -ri
rye run unittests-vcr-record-final
rye run unittests-debug
2 changes: 2 additions & 0 deletions REFERENCES.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ example prompt:

source: <https://www.youtube.com/watch?v=AeASAsPp9LE>


# july 2024

- <https://medium.com/langchain-0-2-insights/langchain-0-2-insights-building-a-versatile-cli-assistant-with-langchain-and-langgraph-bc81a4570ba2>
Expand All @@ -161,3 +162,4 @@ source: <https://www.youtube.com/watch?v=AeASAsPp9LE>
- https://github.com/codingjoe/relint - `Write your own linting rules using regular expressions.`
- https://github.com/ionelmc/python-manhole/ - `Debugging manhole for python applications.`
- https://github.com/langchain-ai/langchain/blob/master/cookbook/Multi_modal_RAG.ipynb
- https://github.com/SAMAD101/Chino/blob/e38f3d9d38702beaed37229f66d79e86a7acab26/src/chino/query.py (write a query module maybe)
266 changes: 263 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,266 @@
version: '3.9'
---
x-default-logging:
driver: "json-file"
options:
max-size: "5m"
max-file: "2"
tag: "{{.Name}}"

networks:
net:
driver: bridge

services:

# postgres:
# image: postgres:14-alpine
# ports:
# - 5433:5432
# volumes:
# - postgres-vector-admin-data:/var/lib/postgresql/data
# environment:
# POSTGRES_USER: vectoradmin
# POSTGRES_PASSWORD: password
# POSTGRES_DB: vdbms
# networks:
# - net
# logging: *logging

# zipkin:
# image: openzipkin/zipkin
# ports:
# - "9411:9411" # you can access Zipkin UI at http://localhost:9411
# # depends_on: [otel-collector]
# environment:
# - JAVA_OPTS=-Xms1024m -Xmx1024m -XX:+ExitOnOutOfMemoryError
# depends_on:
# otel-collector:
# condition: service_started
# networks:
# - net

# otel-collector:
# image: ${COLLECTOR_CONTRIB_IMAGE}
# command: ["--config=/etc/otel-collector-config.yaml"]
# container_name: otel-collector
# volumes:
# - ${PWD}/examples/observability/otel-collector-config.yaml:/etc/otel-collector-config.yaml
# ports:
# - "4317:4317" # OTLP gRPC receiver
# - "4318:4318" # HTTP
# - "55681:55681" # Legacy
# - "1888:1888" # pprof extension
# - "8888:8888" # Prometheus metrics exposed by the collector
# - "8889:8889" # Prometheus exporter metrics
# - "13133:13133" # health_check extension
# - "55679:55679" # zpages extension
# depends_on:
# - jaeger
# logging: *logging
# networks:
# - net

server:
# image: server
image: chromadb/chroma:latest
container_name: chroma
# build:
# context: .
# dockerfile: Dockerfile
volumes:
# Be aware that indexed data are located in "/chroma/chroma/"
# Default configuration for persist_directory in chromadb/config.py
# Read more about deployments: https://docs.trychroma.com/deployment
# - chroma-data:/chroma/chroma
- ./src/goob_ai/data/chroma/vectorstorage:/chroma/chroma:rw

command: "--workers 1 --host 0.0.0.0 --port 8010 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive\
\ 30"
environment:
- IS_PERSISTENT=TRUE
- ALLOW_RESET=TRUE
- CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER}
- CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE}
- CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS}
- CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER}
- PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma}
# - CHROMA_OTEL_COLLECTION_ENDPOINT=http://otel-collector:4318/
# - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS}
# - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME:-chroma}
# - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY:-all}
# - OTEL_EXPORTER_OTLP_ENDPOINT
# - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE
# - OTEL_RESOURCE_ATTRIBUTES
# - OTEL_SERVICE_NAME=chroma
- CHROMA_SERVER_CORS_ALLOW_ORIGINS=["*"]
# - PUBLIC_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
# - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT}
# - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS}
# - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME}
# - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY}
- CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE}
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped"

# depends_on:
# otel-collector:
# condition: service_started
ports:
- "8010:8010"
healthcheck:
# Adjust below to match your container port
test:
- "CMD"
- "curl"
- "-f"
- "http://localhost:8010/api/v1/heartbeat"
interval: 30s
timeout: 10s
retries: 3
networks:
- net

# NOTE: Use http://host.docker.internal:8010 to access the server from the admin container
chromadb-admin:
image: "fengzhichao/chromadb-admin:latest"
# For DHCP it is recommended to remove these ports and instead add: network_mode: "host"
# hostname: 'chromadb-admin'
ports:
- "3000:3000/tcp"
container_name: chromadb-admin
expose:
- 3000
restart: unless-stopped
networks:
- net
# depends_on:
# - server
depends_on:
server:
condition: service_started

# # ********************
# # Telemetry Components
# # ********************
# # Jaeger
# jaeger:
# image: ${JAEGERTRACING_IMAGE}
# container_name: jaeger
# command:
# - "--memory.max-traces=5000"
# - "--query.base-path=/jaeger/ui"
# - "--prometheus.server-url=http://${PROMETHEUS_ADDR}"
# - "--prometheus.query.normalize-calls=true"
# - "--prometheus.query.normalize-duration=true"
# deploy:
# resources:
# limits:
# memory: 400M
# restart: unless-stopped
# ports:
# # - "${JAEGER_SERVICE_PORT}" # Jaeger UI
# # - "${OTEL_COLLECTOR_PORT_GRPC}"
# # SOURCE: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/examples/demo/docker-compose.yaml
# - "16686:16686"
# - "14268"
# - "14250"
# environment:
# - METRICS_STORAGE_TYPE=prometheus
# logging: *logging
# networks:
# - net

# # Grafana
# grafana:
# image: ${GRAFANA_IMAGE}
# container_name: grafana
# deploy:
# resources:
# limits:
# memory: 100M
# restart: unless-stopped
# environment:
# - "GF_INSTALL_PLUGINS=grafana-opensearch-datasource"
# volumes:
# - ./src/grafana/grafana.ini:/etc/grafana/grafana.ini
# - ./src/grafana/provisioning/:/etc/grafana/provisioning/
# ports:
# - "${GRAFANA_SERVICE_PORT}"
# logging: *logging
# networks:
# - net
# # Valkey used by Cart service
# valkey-cart:
# image: ${VALKEY_IMAGE}
# container_name: valkey-cart
# user: valkey
# deploy:
# resources:
# limits:
# memory: 20M
# restart: unless-stopped
# ports:
# - "${VALKEY_PORT}"
# logging: *logging
# networks:
# - net
# # # OpenTelemetry Collector
# # otelcol:
# # image: ${COLLECTOR_CONTRIB_IMAGE}
# # container_name: otel-col
# # deploy:
# # resources:
# # limits:
# # memory: 200M
# # restart: unless-stopped
# # command: [ "--config=/etc/otelcol-config.yml", "--config=/etc/otelcol-config-extras.yml" ]
# # user: 0:0
# # volumes:
# # - ${DOCKER_SOCK}:/var/run/docker.sock:ro
# # - ${OTEL_COLLECTOR_CONFIG}:/etc/otelcol-config.yml
# # - ${OTEL_COLLECTOR_CONFIG_EXTRAS}:/etc/otelcol-config-extras.yml
# # ports:
# # - "${OTEL_COLLECTOR_PORT_GRPC}"
# # - "${OTEL_COLLECTOR_PORT_HTTP}"
# # depends_on:
# # - jaeger
# # logging: *logging
# # environment:
# # - ENVOY_PORT

# # Prometheus
# prometheus:
# image: ${PROMETHEUS_IMAGE}
# container_name: prometheus
# command:
# - --web.console.templates=/etc/prometheus/consoles
# - --web.console.libraries=/etc/prometheus/console_libraries
# - --storage.tsdb.retention.time=1h
# - --config.file=/etc/prometheus/prometheus-config.yaml
# - --storage.tsdb.path=/prometheus
# - --web.enable-lifecycle
# - --web.route-prefix=/
# - --enable-feature=exemplar-storage
# - --enable-feature=otlp-write-receiver
# volumes:
# - ./src/prometheus/prometheus-config.yaml:/etc/prometheus/prometheus-config.yaml
# deploy:
# resources:
# limits:
# memory: 300M
# restart: unless-stopped
# ports:
# - "${PROMETHEUS_SERVICE_PORT}:${PROMETHEUS_SERVICE_PORT}"
# logging: *logging
# networks:
# - net

redis:
image: bitnami/redis:6.2.10
hostname: "goob-redis"
Expand All @@ -14,9 +274,9 @@ services:
timeout: 3s
retries: 50
ports:
- "7600:7600"
- "7600:7600"
volumes:
- 'goob_redis_data:/bitnami/redis/data'
- 'goob_redis_data:/bitnami/redis/data'

volumes:
goob_redis_data:
Expand Down
Loading

0 comments on commit b6150bc

Please sign in to comment.