Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/celery refactor #2813

Merged
merged 27 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1f8bafd
fresh indexing feature branch
rkuo-danswer Oct 14, 2024
2a62422
cherry pick test
rkuo-danswer Oct 14, 2024
57823a2
Revert "cherry pick test"
rkuo-danswer Oct 14, 2024
77710cf
Merge branch 'main' of https://github.com/danswer-ai/danswer into fea…
rkuo-danswer Oct 14, 2024
d8f19f2
set multitenant so that vespa fields match when indexing
rkuo-danswer Oct 14, 2024
96f68a6
cleanup pass
rkuo-danswer Oct 15, 2024
e8f132d
mypy
rkuo-danswer Oct 15, 2024
58a42a2
pass through env var to control celery indexing concurrency
rkuo-danswer Oct 15, 2024
9a8a54a
comments on task kickoff and some logging improvements
rkuo-danswer Oct 15, 2024
409099b
disentangle configuration for different workers and beats.
rkuo-danswer Oct 15, 2024
3d2ccd8
use get_session_with_tenant
rkuo-danswer Oct 16, 2024
5af90c8
comment out all of update.py
rkuo-danswer Oct 16, 2024
d3d0d62
Merge branch 'main' of https://github.com/danswer-ai/danswer into fea…
rkuo-danswer Oct 18, 2024
2b0f09a
rename to RedisConnectorIndexingFenceData
rkuo-danswer Oct 18, 2024
4296f02
first check num_indexing_workers
rkuo-danswer Oct 18, 2024
9921251
refactor RedisConnectorIndexingFenceData
rkuo-danswer Oct 18, 2024
744f3c4
comment out on_worker_process_init
rkuo-danswer Oct 18, 2024
16e8d02
Merge branch 'main' of https://github.com/danswer-ai/danswer into fea…
rkuo-danswer Oct 18, 2024
6ddd5db
Merge branch 'feature/background_indexing_2' of https://github.com/da…
rkuo-danswer Oct 18, 2024
4aec91f
missed a file
rkuo-danswer Oct 18, 2024
649b357
scope db sessions to short lengths
rkuo-danswer Oct 19, 2024
f6acef8
Merge branch 'main' of https://github.com/danswer-ai/danswer into fea…
rkuo-danswer Oct 21, 2024
71efa82
update launch.json template
rkuo-danswer Oct 21, 2024
25432d5
Merge branch 'main' of https://github.com/danswer-ai/danswer into fea…
rkuo-danswer Oct 22, 2024
26bd6c8
fix types
rkuo-danswer Oct 22, 2024
2e83f09
code review
rkuo-danswer Oct 22, 2024
d515e60
Merge branch 'main' of https://github.com/danswer-ai/danswer into fea…
rkuo-danswer Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 235 additions & 29 deletions .vscode/launch.template.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,69 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"compounds": [
{
// Dummy entry used to label the group
"name": "--- Compound ---",
"configurations": [
"--- Individual ---"
],
"presentation": {
"group": "1",
}
},
{
"name": "Run All Danswer Services",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Indexing",
"Background Jobs",
"Slack Bot"
]
}
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery indexing",
"Celery beat",
],
"presentation": {
"group": "1",
}
},
{
"name": "Web / Model / API",
"configurations": [
"Web Server",
"Model Server",
"API Server",
],
"presentation": {
"group": "1",
}
},
{
"name": "Celery (all)",
"configurations": [
"Celery primary",
"Celery light",
"Celery heavy",
"Celery indexing",
"Celery beat"
],
"presentation": {
"group": "1",
}
}
],
"configurations": [
{
// Dummy entry used to label the group
"name": "--- Individual ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "2",
"order": 0
}
},
{
"name": "Web Server",
"type": "node",
Expand All @@ -29,7 +79,11 @@
"runtimeArgs": [
"run", "dev"
],
"console": "integratedTerminal"
"presentation": {
"group": "2",
},
"console": "integratedTerminal",
"consoleTitle": "Web Server Console"
},
{
"name": "Model Server",
Expand All @@ -48,7 +102,11 @@
"--reload",
"--port",
"9000"
]
],
"presentation": {
"group": "2",
},
"consoleTitle": "Model Server Console"
},
{
"name": "API Server",
Expand All @@ -68,57 +126,171 @@
"--reload",
"--port",
"8080"
]
],
"presentation": {
"group": "2",
},
"consoleTitle": "API Server Console"
},
// For the listener to access the Slack API,
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Indexing",
"consoleName": "Indexing",
"name": "Slack Bot",
"consoleName": "Slack Bot",
"type": "debugpy",
"request": "launch",
"program": "danswer/background/update.py",
"program": "danswer/danswerbot/slack/listener.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"ENABLE_MULTIPASS_INDEXING": "false",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
}
},
"presentation": {
"group": "2",
},
"consoleTitle": "Slack Bot Console"
},
// Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev
{
"name": "Background Jobs",
"consoleName": "Background Jobs",
"name": "Celery primary",
"type": "debugpy",
"request": "launch",
"program": "scripts/dev_run_background_jobs.py",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"danswer.background.celery.versioned_apps.primary",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=primary@%n",
"-Q",
"celery",
],
"presentation": {
"group": "2",
},
"consoleTitle": "Celery primary Console"
},
{
"name": "Celery light",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"danswer.background.celery.versioned_apps.light",
"worker",
"--pool=threads",
"--concurrency=64",
"--prefetch-multiplier=8",
"--loglevel=INFO",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion",
],
"presentation": {
"group": "2",
},
"consoleTitle": "Celery light Console"
},
{
"name": "Celery heavy",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "INFO",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"-A",
"danswer.background.celery.versioned_apps.heavy",
"worker",
"--pool=threads",
"--concurrency=4",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=heavy@%n",
"-Q",
"connector_pruning",
],
"presentation": {
"group": "2",
},
"consoleTitle": "Celery heavy Console"
},
{
"name": "Celery indexing",
"type": "debugpy",
"request": "launch",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"ENABLE_MULTIPASS_INDEXING": "false",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"args": [
"--no-indexing"
]
"-A",
"danswer.background.celery.versioned_apps.indexing",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=indexing@%n",
"-Q",
"connector_indexing",
],
"presentation": {
"group": "2",
},
"consoleTitle": "Celery indexing Console"
},
// For the listner to access the Slack API,
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"consoleName": "Slack Bot",
"name": "Celery beat",
"type": "debugpy",
"request": "launch",
"program": "danswer/danswerbot/slack/listener.py",
"module": "celery",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
}
},
"args": [
"-A",
"danswer.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO",
],
"presentation": {
"group": "2",
},
"consoleTitle": "Celery beat Console"
},
{
"name": "Pytest",
Expand All @@ -137,8 +309,22 @@
"-v"
// Specify a sepcific module/test to run or provide nothing to run all tests
//"tests/unit/danswer/llm/answering/test_prune_and_merge.py"
]
],
"presentation": {
"group": "2",
},
"consoleTitle": "Pytest Console"
},
{
// Dummy entry used to label the group
"name": "--- Tasks ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "3",
"order": 0
}
},
{
"name": "Clear and Restart External Volumes and Containers",
"type": "node",
Expand All @@ -147,7 +333,27 @@
"runtimeArgs": ["${workspaceFolder}/backend/scripts/restart_containers.sh"],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true
}
"stopOnEntry": true,
"presentation": {
"group": "3",
},
},
{
// Celery jobs launched through a single background script (legacy)
// Recommend using the "Celery (all)" compound launch instead.
"name": "Background Jobs",
"consoleName": "Background Jobs",
"type": "debugpy",
"request": "launch",
"program": "scripts/dev_run_background_jobs.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
},
]
}
Loading