Add cuda capabilities

projecte-aina · Dec 16, 2023 · ca65154 · ca65154
1 parent 76c49e2
commit ca65154
Show file tree

Hide file tree

Showing 7 changed files with 103 additions and 37 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,4 @@
 models
 venv*
 __pycache__
-.idea
+**/.env
diff --git a/Dockerfile b/Dockerfile
@@ -31,8 +31,4 @@ RUN python -m pip install --no-cache-dir -r requirements.txt
 RUN wget -q http://share.laklak.eu/model_vits_ca/best_model.pth -P /app/models/vits_ca/
 COPY . .
 
-ENV SPEECH_SPEED=1.0
-
-ENV MP_WORKERS=2
-
-ENTRYPOINT python server/server.py --speech_speed ${SPEECH_SPEED} --mp_workers ${MP_WORKERS}
+ENTRYPOINT python server/server.py --speech_speed ${SPEECH_SPEED} --mp_workers ${MP_WORKERS} --use_cuda ${USE_CUDA} --use_mp ${USE_MP}
diff --git a/Makefile b/Makefile
@@ -1,8 +1,7 @@
-speech_speed ?= 1.0
-mp_workers ?= 4
-
 deploy:
-	speech_speed=$(speech_speed) mp_workers=$(mp_workers) docker compose up -d --build
+	docker compose --env-file .env up -d --build
+deploy-gpu: 
+	docker compose -f docker-compose-gpu.yml --env-file .env up -d --build
 undeploy:
 	docker compose down
 stop:

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# TTS Api
+# TTS API
 
 RestFUL api and web interface to serve coqui TTS models
 
@@ -130,7 +130,6 @@ POST /api/tts
 }
 ```
 
-## Deployment
 
 #### Command line deployment arguments
 | **Argument**           | **Type** | **Default**                             | **Description**                                                               |
@@ -143,18 +142,61 @@ POST /api/tts
 - The "speech_speed" argument refers to a parameter that adjusts the rate at which speech sounds in an audio output, with higher values resulting in faster speech, and lower values leading to slower speech.
 
 
-#### Deployment via docker compose
+## Deployment
+
+
+### Environment Variables
+
+To deploy this project, you will need to add the following environment variables to your .env file
+
+`SPEECH_SPEED`
+
+`MP_WORKERS`
 
+`USE_CUDA`
+
+`USE_MP`
+
+`SHM_SIZE`
+
+
+Example of .env file
 ```bash
-make deploy
+SPEECH_SPEED=1.0
+MP_WORKERS=4
+USE_CUDA=False
+USE_MP=True
+SHM_SIZE=2gb
 ```
-Example of deployment changing speech_speed parameter
 
+
+### Deployment via docker compose
+
+#### Prerequisites
+
+- Make
+
+- [Docker](https://docs.docker.com/engine/install/ubuntu/)
+
+- [Docker compose](https://docs.docker.com/compose/install/)
+
+To deploy this app
 ```bash
-make deploy speech_speed=1.5 
+make deploy
 ```
 
-The example docker-compose file shows also the build-arg usage for the speech_speed parameter.
+To deploy this app using GPU
+```bash
+make deploy-gpu
+```
+To stop deployment run
+```bash
+make stop
+```
+To delete deployment run
+```bash
+make undeploy
+```
 
 #### Deployment via Helm
 
@@ -188,7 +230,7 @@ helm upgrade --install aina-tts-api --create-namespace \
 ```
 
 ## Authors and acknowledgment
-Developed by the Text Mining Unit in Barcelona Supercomputing Center. The code is based on Coqui TTS server.py that has a Mozilla Public License 2.0.
+Developed by the Language Technologies Unit in Barcelona Supercomputing Center. The code is based on Coqui TTS server.py that has a Mozilla Public License 2.0.
 
 ## License
 Mozilla Public License 2.0

diff --git a/docker-compose-gpu.yml b/docker-compose-gpu.yml
@@ -0,0 +1,21 @@
+version: '3.9'
+services:
+  server:
+    build:
+      context: .
+    environment:
+      - SPEECH_SPEED=${SPEECH_SPEED}
+      - MP_WORKERS=${MP_WORKERS}
+      - USE_CUDA=True
+      - USE_MP=${USE_MP}
+    restart: unless-stopped
+    ports:
+      - '8080:8000'
+    shm_size: ${SHM_SIZE}
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -4,9 +4,10 @@ services:
     build:
       context: .
     environment:
-      - SPEECH_SPEED=${speech_speed}
-      - MP_WORKERS=${mp_workers}
+      - SPEECH_SPEED=${SPEECH_SPEED}
+      - MP_WORKERS=${MP_WORKERS}
+      - USE_MP=${USE_MP}
     restart: unless-stopped
     ports:
       - '8080:8000'
-    shm_size: '1gb'
+    shm_size: ${SHM_SIZE}
diff --git a/server/server.py b/server/server.py
@@ -102,11 +102,12 @@ def convert_boolean(x):
     parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
     parser.add_argument("--port", type=int, default=8000, help="port to listen on.")
     parser.add_argument("--host", type=str, default="0.0.0.0", help="host ip to listen.")
-    parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.")
-    parser.add_argument("--mp_workers", action=MpWorkersAction ,type=int, default=mp.cpu_count(), help="number of CPUs used for multiprocessing")
+    parser.add_argument("--use_mp", type=convert_boolean, default=True, nargs='?', const=True, help="true to use Python multiprocessing.")
+    parser.add_argument("--use_cuda", type=convert_boolean, default=False, nargs='?', const=False, help="true to use CUDA.")
+    parser.add_argument("--mp_workers", action=MpWorkersAction ,type=int, default=mp.cpu_count(), nargs='?', const=mp.cpu_count(), help="number of CPUs used for multiprocessing")
     parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.")
     parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.")
-    parser.add_argument("--speech_speed", type=float, default=1.0, help="Change speech speed.")
+    parser.add_argument("--speech_speed", type=float, default=1.0, nargs='?', const=1.0, help="Change speech speed.")
     return parser
 
 
@@ -277,7 +278,7 @@ async def speaker_exception_handler(request: Request, exc: LanguageException):
 @app.get("/startup-parameters")
 async def parameters():
     return JSONResponse(
-        content={"speech_speed": args.speech_speed, "mp_workers": args.mp_workers},
+        content={"speech_speed": args.speech_speed, "mp_workers": args.mp_workers, "use_cuda": args.use_cuda, "use_mp": args.use_mp},
     )
 
 @app.get("/api/available-voices")
@@ -370,24 +371,30 @@ async def tts(request: TTSRequestModel):
 
     model = app.state.synthesizer
 
-    sentences = segmenter.segment(text)
+    if args.use_cuda or not args.use_mp:
+        wavs = worker(text, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"])
+        out = io.BytesIO()
+        model.save_wav(wavs, out)
+    else:
+
+        sentences = segmenter.segment(text)
 
-    mp_workers = args.mp_workers
-    worker_with_args = partial(worker, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"])
+        mp_workers = args.mp_workers
+        worker_with_args = partial(worker, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"])
 
-    pool = mp.Pool(processes=mp_workers)
+        pool = mp.Pool(processes=mp_workers)
 
-    results = pool.map(worker_with_args, [sentence.strip() for sentence in sentences if sentence])
+        results = pool.map(worker_with_args, [sentence.strip() for sentence in sentences if sentence])
 
-    # Close the pool to indicate that no more tasks will be submitted
-    pool.close()
-    # Wait for all processes to complete
-    pool.join()
-    merged_wavs = list(chain(*results))
+        # Close the pool to indicate that no more tasks will be submitted
+        pool.close()
+        # Wait for all processes to complete
+        pool.join()
+        merged_wavs = list(chain(*results))
 
-    out = io.BytesIO()
+        out = io.BytesIO()
 
-    model.save_wav(merged_wavs, out)
+        model.save_wav(merged_wavs, out)
 
     return StreamingResponse(out, media_type="audio/wav")