Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Cleanup indexes in case of failure #232

Merged
merged 18 commits into from
Dec 20, 2023
25 changes: 22 additions & 3 deletions .github/workflows/PR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ jobs:
matrix:
python-version: [3.9, '3.10', 3.11]
pinecone-plan: ["paid", "starter"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -66,6 +65,7 @@ jobs:
echo "${SUFFIX}"
echo "INDEX_NAME_SUFFIX=${SUFFIX}" >> $GITHUB_OUTPUT
- name: Run system tests
id: system_tests
if: github.event_name == 'merge_group'
env:
INDEX_NAME: system-${{ steps.gen_suffix.outputs.INDEX_NAME_SUFFIX }}
Expand All @@ -74,17 +74,36 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANYSCALE_API_KEY: ${{ secrets.ANYSCALE_API_KEY }}
CO_API_KEY: ${{ secrets.CO_API_KEY }}
run: poetry run pytest -n 3 --dist loadscope --html=report_system.html --self-contained-html tests/system
run: |
run_id=$(uuidgen | tr -d '-' | tr '[:upper:]' '[:lower:]')
echo "run_id=${run_id}" >> $GITHUB_OUTPUT
echo "Test Run ID: ${run_id}"
poetry run pytest -n 3 --dist loadscope --testrunuid $run_id --html=report_system.html --self-contained-html tests/system
- name: Run e2e tests
id: e2e_tests
if: github.event_name == 'merge_group'
env:
INDEX_NAME: e2e-${{ steps.gen_suffix.outputs.INDEX_NAME_SUFFIX }}
PINECONE_ENVIRONMENT: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_ENVIRONMENT_3 || secrets.PINECONE_ENVIRONMENT_4 }}
PINECONE_API_KEY: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_API_KEY_3 || secrets.PINECONE_API_KEY_4 }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANYSCALE_API_KEY: ${{ secrets.ANYSCALE_API_KEY }}
CO_API_KEY: ${{ secrets.CO_API_KEY }}
CE_LOG_FILENAME: e2e.log
run: poetry run pytest -n 3 --dist loadscope --html=report_e2e.html --self-contained-html tests/e2e
run: |
run_id=$(uuidgen | tr -d '-' | tr '[:upper:]' '[:lower:]')
echo "run_id=${run_id}" >> $GITHUB_OUTPUT
echo "Test Run ID: ${run_id}"
poetry run pytest -n 3 --dist loadscope --testrunuid $run_id --html=report_e2e.html --self-contained-html tests/e2e
- name: Cleanup indexes
if: (cancelled() || failure()) && github.event_name == 'merge_group'
env:
PINECONE_ENVIRONMENT: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_ENVIRONMENT_3 || secrets.PINECONE_ENVIRONMENT_4 }}
PINECONE_API_KEY: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_API_KEY_3 || secrets.PINECONE_API_KEY_4 }}
run: |
export PYTHONPATH=.
poetry run python scripts/cleanup_indexes.py "${{ steps.e2e_tests.outputs.run_id }}"
poetry run python scripts/cleanup_indexes.py "${{ steps.system_tests.outputs.run_id }}"
- name: upload pytest report.html
uses: actions/upload-artifact@v3
if: always()
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,5 @@ skip-checking-raises = true
[tool.poetry.scripts]
canopy = "canopy_cli.cli:cli"

[tool.pytest.ini_options]
log_cli = true
23 changes: 23 additions & 0 deletions scripts/cleanup_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import logging
import sys
from tests.util import cleanup_indexes


def main():
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

if len(sys.argv) != 2:
logger.info("Usage: python scripts/cleanup_indexes.py <testrun_uid>")
sys.exit(1)

testrun_uid = sys.argv[1]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please promise me that if we're every adding even more one parameter, we're switching to argparse 😄

if testrun_uid:
logger.info(f"Cleaning up indexes for testrun_uid '{testrun_uid}'")
cleanup_indexes(testrun_uid)
else:
logger.info("Passed testrun_uid is empty.")


if __name__ == '__main__':
main()
15 changes: 6 additions & 9 deletions tests/e2e/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@
import os
from typing import List

from datetime import datetime

import pinecone
import pytest
from fastapi.testclient import TestClient
from tenacity import retry, stop_after_attempt, wait_fixed

from canopy.knowledge_base import KnowledgeBase

from canopy_server.app import app, API_VERSION
from canopy_server.models.v1.api_models import (
HealthStatus,
ContextUpsertRequest,
ContextQueryRequest)
from .. import Tokenizer
from ..util import create_e2e_tests_index_name

upsert_payload = ContextUpsertRequest(
documents=[
Expand Down Expand Up @@ -45,17 +43,15 @@ def assert_vector_ids_not_exist(vector_ids: List[str],


@pytest.fixture(scope="module")
def index_name(testrun_uid):
today = datetime.today().strftime("%Y-%m-%d")
return f"test-app-{testrun_uid[-6:]}-{today}"
def index_name(testrun_uid: str):
return create_e2e_tests_index_name(testrun_uid)


@pytest.fixture(scope="module", autouse=True)
def knowledge_base(index_name):
pinecone.init()
kb = KnowledgeBase(index_name=index_name)
kb.create_canopy_index(indexed_fields=["test"])

return kb


Expand Down Expand Up @@ -84,6 +80,7 @@ def teardown_knowledge_base(knowledge_base):
if index_name in pinecone.list_indexes():
pinecone.delete_index(index_name)


# TODO: the following test is a complete e2e test, this it not the final design
# for the e2e tests, however there were some issues
# with the fixtures that will be resovled
Expand All @@ -93,8 +90,8 @@ def test_health(client):
health_response = client.get("/health")
assert health_response.is_success
assert (
health_response.json()
== HealthStatus(pinecone_status="OK", llm_status="OK").dict()
health_response.json()
== HealthStatus(pinecone_status="OK", llm_status="OK").dict()
)


Expand Down
20 changes: 9 additions & 11 deletions tests/system/knowledge_base/test_knowledge_base.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
import os
import random

import pytest
import pinecone
import numpy as np
import pinecone
import pytest
from dotenv import load_dotenv
from tenacity import (
retry,
stop_after_delay,
wait_fixed,
wait_chain,
)
from dotenv import load_dotenv
from datetime import datetime

from canopy.knowledge_base import KnowledgeBase, list_canopy_indexes
from canopy.knowledge_base.chunker import Chunker
from canopy.knowledge_base.knowledge_base import INDEX_NAME_PREFIX
from canopy.knowledge_base.models import DocumentWithScore
from canopy.knowledge_base.record_encoder import RecordEncoder
from canopy.knowledge_base.reranker import Reranker
from canopy.models.data_models import Document, Query
from tests.unit.stubs.stub_record_encoder import StubRecordEncoder
from tests.unit.stubs.stub_dense_encoder import StubDenseEncoder
from tests.unit.stubs.stub_chunker import StubChunker
from tests.unit import random_words

from tests.unit.stubs.stub_chunker import StubChunker
from tests.unit.stubs.stub_dense_encoder import StubDenseEncoder
from tests.unit.stubs.stub_record_encoder import StubRecordEncoder
from tests.util import create_system_tests_index_name

load_dotenv()

Expand All @@ -42,8 +42,7 @@ def retry_decorator():

@pytest.fixture(scope="module")
def index_name(testrun_uid):
today = datetime.today().strftime("%Y-%m-%d")
return f"test-kb-{testrun_uid[-6:]}-{today}"
return create_system_tests_index_name(testrun_uid)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -299,7 +298,6 @@ def test_update_documents(encoder,
documents,
encoded_chunks,
knowledge_base):

index_name = knowledge_base._index_name

# chunker/kb that produces fewer chunks per doc
Expand Down
35 changes: 35 additions & 0 deletions tests/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging
from datetime import datetime

import pinecone

logger = logging.getLogger(__name__)


def create_index_name(testrun_uid: str, prefix: str) -> str:
today = datetime.today().strftime("%Y-%m-%d")
return f"{prefix}-{testrun_uid[-6:]}-{today}"


def create_system_tests_index_name(testrun_uid: str) -> str:
return create_index_name(testrun_uid, "test-kb")


def create_e2e_tests_index_name(testrun_uid: str) -> str:
return create_index_name(testrun_uid, "test-app")


def cleanup_indexes(testrun_uid: str):
pinecone.init()
e2e_index_name = create_e2e_tests_index_name(testrun_uid)
system_index_name = create_system_tests_index_name(testrun_uid)
index_names = (system_index_name, e2e_index_name)
logger.info(f"Preparing to cleanup indexes: {index_names}")
current_indexes = pinecone.list_indexes()
for index_name in index_names:
if index_name in current_indexes:
logger.info(f"Deleting index '{index_name}'...")
pinecone.delete_index(index_name)
logger.info(f"Index '{index_name}' deleted.")
else:
logger.info(f"Index '{index_name}' does not exist.")