Skip to content

Commit

Permalink
Add cuda capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulNdrei committed Dec 16, 2023
1 parent 76c49e2 commit ca65154
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
models
venv*
__pycache__
.idea
**/.env
6 changes: 1 addition & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,4 @@ RUN python -m pip install --no-cache-dir -r requirements.txt
RUN wget -q http://share.laklak.eu/model_vits_ca/best_model.pth -P /app/models/vits_ca/
COPY . .

ENV SPEECH_SPEED=1.0

ENV MP_WORKERS=2

ENTRYPOINT python server/server.py --speech_speed ${SPEECH_SPEED} --mp_workers ${MP_WORKERS}
ENTRYPOINT python server/server.py --speech_speed ${SPEECH_SPEED} --mp_workers ${MP_WORKERS} --use_cuda ${USE_CUDA} --use_mp ${USE_MP}
7 changes: 3 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
speech_speed ?= 1.0
mp_workers ?= 4

deploy:
speech_speed=$(speech_speed) mp_workers=$(mp_workers) docker compose up -d --build
docker compose --env-file .env up -d --build
deploy-gpu:
docker compose -f docker-compose-gpu.yml --env-file .env up -d --build
undeploy:
docker compose down
stop:
Expand Down
58 changes: 50 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# TTS Api
# TTS API

RestFUL api and web interface to serve coqui TTS models

Expand Down Expand Up @@ -130,7 +130,6 @@ POST /api/tts
}
```

## Deployment

#### Command line deployment arguments
| **Argument** | **Type** | **Default** | **Description** |
Expand All @@ -143,18 +142,61 @@ POST /api/tts
- The "speech_speed" argument refers to a parameter that adjusts the rate at which speech sounds in an audio output, with higher values resulting in faster speech, and lower values leading to slower speech.


#### Deployment via docker compose
## Deployment


### Environment Variables

To deploy this project, you will need to add the following environment variables to your .env file

`SPEECH_SPEED`

`MP_WORKERS`

`USE_CUDA`

`USE_MP`

`SHM_SIZE`


Example of .env file
```bash
make deploy
SPEECH_SPEED=1.0
MP_WORKERS=4
USE_CUDA=False
USE_MP=True
SHM_SIZE=2gb
```
Example of deployment changing speech_speed parameter


### Deployment via docker compose

#### Prerequisites

- Make

- [Docker](https://docs.docker.com/engine/install/ubuntu/)

- [Docker compose](https://docs.docker.com/compose/install/)

To deploy this app
```bash
make deploy speech_speed=1.5
make deploy
```

The example docker-compose file shows also the build-arg usage for the speech_speed parameter.
To deploy this app using GPU
```bash
make deploy-gpu
```
To stop deployment run
```bash
make stop
```
To delete deployment run
```bash
make undeploy
```

#### Deployment via Helm

Expand Down Expand Up @@ -188,7 +230,7 @@ helm upgrade --install aina-tts-api --create-namespace \
```

## Authors and acknowledgment
Developed by the Text Mining Unit in Barcelona Supercomputing Center. The code is based on Coqui TTS server.py that has a Mozilla Public License 2.0.
Developed by the Language Technologies Unit in Barcelona Supercomputing Center. The code is based on Coqui TTS server.py that has a Mozilla Public License 2.0.

## License
Mozilla Public License 2.0
Expand Down
21 changes: 21 additions & 0 deletions docker-compose-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: '3.9'
services:
server:
build:
context: .
environment:
- SPEECH_SPEED=${SPEECH_SPEED}
- MP_WORKERS=${MP_WORKERS}
- USE_CUDA=True
- USE_MP=${USE_MP}
restart: unless-stopped
ports:
- '8080:8000'
shm_size: ${SHM_SIZE}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
7 changes: 4 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ services:
build:
context: .
environment:
- SPEECH_SPEED=${speech_speed}
- MP_WORKERS=${mp_workers}
- SPEECH_SPEED=${SPEECH_SPEED}
- MP_WORKERS=${MP_WORKERS}
- USE_MP=${USE_MP}
restart: unless-stopped
ports:
- '8080:8000'
shm_size: '1gb'
shm_size: ${SHM_SIZE}
39 changes: 23 additions & 16 deletions server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,12 @@ def convert_boolean(x):
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
parser.add_argument("--port", type=int, default=8000, help="port to listen on.")
parser.add_argument("--host", type=str, default="0.0.0.0", help="host ip to listen.")
parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.")
parser.add_argument("--mp_workers", action=MpWorkersAction ,type=int, default=mp.cpu_count(), help="number of CPUs used for multiprocessing")
parser.add_argument("--use_mp", type=convert_boolean, default=True, nargs='?', const=True, help="true to use Python multiprocessing.")
parser.add_argument("--use_cuda", type=convert_boolean, default=False, nargs='?', const=False, help="true to use CUDA.")
parser.add_argument("--mp_workers", action=MpWorkersAction ,type=int, default=mp.cpu_count(), nargs='?', const=mp.cpu_count(), help="number of CPUs used for multiprocessing")
parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.")
parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.")
parser.add_argument("--speech_speed", type=float, default=1.0, help="Change speech speed.")
parser.add_argument("--speech_speed", type=float, default=1.0, nargs='?', const=1.0, help="Change speech speed.")
return parser


Expand Down Expand Up @@ -277,7 +278,7 @@ async def speaker_exception_handler(request: Request, exc: LanguageException):
@app.get("/startup-parameters")
async def parameters():
return JSONResponse(
content={"speech_speed": args.speech_speed, "mp_workers": args.mp_workers},
content={"speech_speed": args.speech_speed, "mp_workers": args.mp_workers, "use_cuda": args.use_cuda, "use_mp": args.use_mp},
)

@app.get("/api/available-voices")
Expand Down Expand Up @@ -370,24 +371,30 @@ async def tts(request: TTSRequestModel):

model = app.state.synthesizer

sentences = segmenter.segment(text)
if args.use_cuda or not args.use_mp:
wavs = worker(text, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"])
out = io.BytesIO()
model.save_wav(wavs, out)
else:

sentences = segmenter.segment(text)

mp_workers = args.mp_workers
worker_with_args = partial(worker, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"])
mp_workers = args.mp_workers
worker_with_args = partial(worker, speaker_id=speaker_id, model=model, use_aliases=speaker_config_attributes["use_aliases"], new_speaker_ids=speaker_config_attributes["new_speaker_ids"])

pool = mp.Pool(processes=mp_workers)
pool = mp.Pool(processes=mp_workers)

results = pool.map(worker_with_args, [sentence.strip() for sentence in sentences if sentence])
results = pool.map(worker_with_args, [sentence.strip() for sentence in sentences if sentence])

# Close the pool to indicate that no more tasks will be submitted
pool.close()
# Wait for all processes to complete
pool.join()
merged_wavs = list(chain(*results))
# Close the pool to indicate that no more tasks will be submitted
pool.close()
# Wait for all processes to complete
pool.join()
merged_wavs = list(chain(*results))

out = io.BytesIO()
out = io.BytesIO()

model.save_wav(merged_wavs, out)
model.save_wav(merged_wavs, out)

return StreamingResponse(out, media_type="audio/wav")

Expand Down

0 comments on commit ca65154

Please sign in to comment.