Wordcab · info-wordcab · Apr 4, 2024 · Apr 4, 2024
diff --git a/.env b/.env
@@ -30,8 +30,13 @@ DEBUG=True
 # Local models:
 # You can also link a local folder path to use a custom model. If you do so, you should also mount the folder in the
 # docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image.
-# e.g. WHISPER_MODEL="/app/models/custom"
-# docker cmd: -v /path/to/custom/model:/app/models/custom
+# Note that for the default tensorrt-llm whisper engine, the simplest way to get a converted model is to use
+# hatch to start the server locally once. Specify the WHISPER_MODEL and ALIGN_MODEL here, then run
+# "hatch run runtime:launch" in your terminal. This will download and convert these models.
+# You'll then find the converted models in cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models
+# Then in your Dockerfile, copy the converted models to the /app/src/wordcab_transcribe/whisper_models folder.
+# Example for WHISPER_MODEL: COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/large-v3 /app/src/wordcab_transcribe/whisper_models/large-v3
+# Example for ALIGN_MODEL: COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/tiny /app/src/wordcab_transcribe/whisper_models/tiny
 WHISPER_MODEL="large-v3"
 # You can specify one of two engines, "faster-whisper" or "tensorrt-llm". At the moment, "faster-whisper" is more
 # stable, adjustable, and accurate, while "tensorrt-llm" is faster but less accurate and adjustable.

diff --git a/README.md b/README.md
@@ -227,18 +227,18 @@ with open("youtube_video_output.json", "w", encoding="utf-8") as f:
 
 ## Running Local Models
 
-To run the API with local models, you need to mount a volume to the container or
-include the models in the image. You then need to modify the `.env` file to point to the local model,
-as shown below:
+You can link a local folder path to use a custom model. If you do so, you should mount the folder in the
+docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image.
 
-```
-WHISPER_MODEL="/app/models/custom" 
-```
+**Note** that for the default `tensorrt-llm` whisper engine, the simplest way to get a converted model is to use
+`hatch` to start the server locally once. Specify the `WHISPER_MODEL` and `ALIGN_MODEL` in `.env`, then run
+`hatch run runtime:launch` in your terminal. This will download and convert these models.
+
+You'll then find the converted models in `cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models`.
+Then in your Dockerfile, copy the converted models to the `/app/src/wordcab_transcribe/whisper_models` directory.
 
-Note that if you're using the `tensorrt_llm` whisper engine, and these are not located in the
-container, the default directory these models will be saved to is `/app/src/wordcab_transcribe/whisper_models`.
-If you're saving/mounting models to this directory, be sure to see the supported models in the `.env` file, 
-so your self-hosted model does not conflict with the default model names.
+Example Dockerfile line for `WHISPER_MODEL`: `COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/large-v3 /app/src/wordcab_transcribe/whisper_models/large-v3`
+Example Dockerfile line for `ALIGN_MODEL`: `COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/tiny /app/src/wordcab_transcribe/whisper_models/tiny`
 
 ## 🚀 Contributing
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -111,7 +111,7 @@ features = [
 ]
 
 [tool.hatch.envs.runtime.scripts]
-launch = "uvicorn --host='::' --port=5001 src.wordcab_transcribe.main:app"
+launch = "uvicorn --host=0.0.0.0 --port=5001 src.wordcab_transcribe.main:app"
 
 [tool.hatch.envs.quality]
 features = [

diff --git a/src/wordcab_transcribe/config.py b/src/wordcab_transcribe/config.py
@@ -93,22 +93,26 @@ def project_name_must_not_be_none(cls, value: str):  # noqa: B902, N805
     @field_validator("whisper_model")
     def whisper_model_compatibility_check(cls, value: str):  # noqa: B902, N805
         """Check that the whisper engine is compatible."""
-        if value.lower() not in [
-            "tiny",
-            "tiny.en",
-            "base",
-            "base.en",
-            "small",
-            "small.en",
-            "medium",
-            "medium.en",
-            "large",
-            "large-v1",
-            "large-v2",
-            "large-v3",
-            "distil-large-v2",
-            "distil-large-v3",
-        ]:
+        if (
+            value.lower()
+            not in [
+                "tiny",
+                "tiny.en",
+                "base",
+                "base.en",
+                "small",
+                "small.en",
+                "medium",
+                "medium.en",
+                "large",
+                "large-v1",
+                "large-v2",
+                "large-v3",
+                "distil-large-v2",
+                "distil-large-v3",
+            ]
+            and "/" not in value
+        ):
             raise ValueError(
                 "The whisper models must be one of `tiny`, `tiny.en`, `base`,"
                 " `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`,"