diff --git a/.gitignore b/.gitignore index 3452faf..9512c53 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ models venv* __pycache__ -.idea \ No newline at end of file +**/.env \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 26723e2..0e8696d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,8 +31,4 @@ RUN python -m pip install --no-cache-dir -r requirements.txt RUN wget -q http://share.laklak.eu/model_vits_ca/best_model.pth -P /app/models/vits_ca/ COPY . . -ENV SPEECH_SPEED=1.0 - -ENV MP_WORKERS=2 - -ENTRYPOINT python server/server.py --speech_speed ${SPEECH_SPEED} --mp_workers ${MP_WORKERS} +ENTRYPOINT python server/server.py --speech_speed ${SPEECH_SPEED} --mp_workers ${MP_WORKERS} --use_cuda ${USE_CUDA} --use_mp ${USE_MP} diff --git a/Makefile b/Makefile index 3620b5c..0d46d34 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,7 @@ -speech_speed ?= 1.0 -mp_workers ?= 4 - deploy: - speech_speed=$(speech_speed) mp_workers=$(mp_workers) docker compose up -d --build + docker compose --env-file .env up -d --build +deploy-gpu: + docker compose -f docker-compose-gpu.yml --env-file .env up -d --build undeploy: docker compose down stop: diff --git a/README.md b/README.md index ebc74fb..78acc5d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# TTS Api +# TTS API RestFUL api and web interface to serve coqui TTS models @@ -130,7 +130,6 @@ POST /api/tts } ``` -## Deployment #### Command line deployment arguments | **Argument** | **Type** | **Default** | **Description** | @@ -143,18 +142,61 @@ POST /api/tts - The "speech_speed" argument refers to a parameter that adjusts the rate at which speech sounds in an audio output, with higher values resulting in faster speech, and lower values leading to slower speech. -#### Deployment via docker compose +## Deployment + + +### Environment Variables + +To deploy this project, you will need to add the following environment variables to your .env file + +`SPEECH_SPEED` + +`MP_WORKERS` +`USE_CUDA` + +`USE_MP` + +`SHM_SIZE` + + +Example of .env file ```bash -make deploy +SPEECH_SPEED=1.0 +MP_WORKERS=4 +USE_CUDA=False +USE_MP=True +SHM_SIZE=2gb ``` -Example of deployment changing speech_speed parameter + +### Deployment via docker compose + +#### Prerequisites + +- Make + +- [Docker](https://docs.docker.com/engine/install/ubuntu/) + +- [Docker compose](https://docs.docker.com/compose/install/) + +To deploy this app ```bash -make deploy speech_speed=1.5 +make deploy ``` -The example docker-compose file shows also the build-arg usage for the speech_speed parameter. +To deploy this app using GPU +```bash +make deploy-gpu +``` +To stop deployment run +```bash +make stop +``` +To delete deployment run +```bash +make undeploy +``` #### Deployment via Helm @@ -188,7 +230,7 @@ helm upgrade --install aina-tts-api --create-namespace \ ``` ## Authors and acknowledgment -Developed by the Text Mining Unit in Barcelona Supercomputing Center. The code is based on Coqui TTS server.py that has a Mozilla Public License 2.0. +Developed by the Language Technologies Unit in Barcelona Supercomputing Center. The code is based on Coqui TTS server.py that has a Mozilla Public License 2.0. ## License Mozilla Public License 2.0 diff --git a/docker-compose-gpu.yml b/docker-compose-gpu.yml new file mode 100644 index 0000000..c434532 --- /dev/null +++ b/docker-compose-gpu.yml @@ -0,0 +1,21 @@ +version: '3.9' +services: + server: + build: + context: . + environment: + - SPEECH_SPEED=${SPEECH_SPEED} + - MP_WORKERS=${MP_WORKERS} + - USE_CUDA=True + - USE_MP=${USE_MP} + restart: unless-stopped + ports: + - '8080:8000' + shm_size: ${SHM_SIZE} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index b40fadf..59a5fea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,9 +4,10 @@ services: build: context: . environment: - - SPEECH_SPEED=${speech_speed} - - MP_WORKERS=${mp_workers} + - SPEECH_SPEED=${SPEECH_SPEED} + - MP_WORKERS=${MP_WORKERS} + - USE_MP=${USE_MP} restart: unless-stopped ports: - '8080:8000' - shm_size: '1gb' \ No newline at end of file + shm_size: ${SHM_SIZE} diff --git a/server/server.py b/server/server.py index f094d2f..4dddf3c 100644 --- a/server/server.py +++ b/server/server.py @@ -102,11 +102,12 @@ def convert_boolean(x): parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None) parser.add_argument("--port", type=int, default=8000, help="port to listen on.") parser.add_argument("--host", type=str, default="0.0.0.0", help="host ip to listen.") - parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.") - parser.add_argument("--mp_workers", action=MpWorkersAction ,type=int, default=mp.cpu_count(), help="number of CPUs used for multiprocessing") + parser.add_argument("--use_mp", type=convert_boolean, default=True, nargs='?', const=True, help="true to use Python multiprocessing.") + parser.add_argument("--use_cuda", type=convert_boolean, default=False, nargs='?', const=False, help="true to use CUDA.") + parser.add_argument("--mp_workers", action=MpWorkersAction ,type=int, default=mp.cpu_count(), nargs='?', const=mp.cpu_count(), help="number of CPUs used for multiprocessing") parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.") parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.") - parser.add_argument("--speech_speed", type=float, default=1.0, help="Change speech speed.") + parser.add_argument("--speech_speed", type=float, default=1.0, nargs='?', const=1.0, help="Change speech speed.") return parser @@ -277,7 +278,7 @@ async def speaker_exception_handler(request: Request, exc: LanguageException): @app.get("/startup-parameters") async def parameters(): return JSONResponse( - content={"speech_speed": args.speech_speed, "mp_workers": args.mp_workers}, + content={"speech_speed": args.speech_speed, "mp_workers": args.mp_workers, "use_cuda": args.use_cuda, "use_mp": args.use_mp}, ) @app.get("/api/available-voices") @@ -370,24 +371,30 @@ async def tts(request: TTSRequestModel): model = app.state.synthesizer - sentences = segmenter.segment(text) + if args.use_cuda or not args.use_mp: + wavs = worker(text, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"]) + out = io.BytesIO() + model.save_wav(wavs, out) + else: + + sentences = segmenter.segment(text) - mp_workers = args.mp_workers - worker_with_args = partial(worker, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"]) + mp_workers = args.mp_workers + worker_with_args = partial(worker, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"]) - pool = mp.Pool(processes=mp_workers) + pool = mp.Pool(processes=mp_workers) - results = pool.map(worker_with_args, [sentence.strip() for sentence in sentences if sentence]) + results = pool.map(worker_with_args, [sentence.strip() for sentence in sentences if sentence]) - # Close the pool to indicate that no more tasks will be submitted - pool.close() - # Wait for all processes to complete - pool.join() - merged_wavs = list(chain(*results)) + # Close the pool to indicate that no more tasks will be submitted + pool.close() + # Wait for all processes to complete + pool.join() + merged_wavs = list(chain(*results)) - out = io.BytesIO() + out = io.BytesIO() - model.save_wav(merged_wavs, out) + model.save_wav(merged_wavs, out) return StreamingResponse(out, media_type="audio/wav")