Wordcab · info-wordcab · Apr 4, 2024 · Apr 4, 2024
diff --git a/.env b/.env
@@ -23,13 +23,13 @@ DEBUG=True
 # The whisper_model parameter is used to control the model used for ASR.
 #
 # Cloud models:
-# The available models are: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, or large-v2
-# You can try different model size, but you should see a trade-off between performance and speed. Note that the
-# "distil" whisper models do not support languages other than English.
+# The available models are: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2
+# large-v3, distil-large-v2, and distil-large-v3. Note that the distil models only support "en" as a source_lang.
+# You can try different model size, but you should see a trade-off between performance and speed.
 #
 # Local models:
 # You can also link a local folder path to use a custom model. If you do so, you should also mount the folder in the
-# docker run command as a volume.
+# docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image.
 # e.g. WHISPER_MODEL="/app/models/custom"
 # docker cmd: -v /path/to/custom/model:/app/models/custom
 WHISPER_MODEL="large-v3"
@@ -53,9 +53,9 @@ TOKENIZERS_PARALLELISM=False
 # --------------------------------------------------- DIARIZATION ---------------------------------------------------- #
 #
 # The diarization_backend parameter is used to control the diarization model used. The available options are:
-# "longform_diarizer" or "default_diarizer". It's suggested to use "default_diarizer" for better stability.
-# The "longform_diarizer" is still being developed.
-DIARIZATION_BACKEND="default_diarizer"
+# "longform-diarizer" or "default-diarizer". It's suggested to use "default-diarizer" for better stability.
+# The "longform-diarizer" is still being developed.
+DIARIZATION_BACKEND="default-diarizer"
 # In a MSDD (Multiscale Diarization Decoder) model, the diarization model is trained on multiple window lengths.
 # The window_lengths are specified in seconds, and separated by a comma. If not specified, the default value will
 # be "1.5, 1.25, 1.0, 0.75, 0.5".

diff --git a/pyproject.toml b/pyproject.toml
@@ -111,7 +111,7 @@ features = [
 ]
 
 [tool.hatch.envs.runtime.scripts]
-launch = "uvicorn --host=0.0.0.0 --port=5001 src.wordcab_transcribe.main:app"
+launch = "uvicorn --host='::' --port=5001 src.wordcab_transcribe.main:app"
 
 [tool.hatch.envs.quality]
 features = [

diff --git a/src/wordcab_transcribe/config.py b/src/wordcab_transcribe/config.py
@@ -142,7 +142,7 @@ def align_model_compatibility_check(cls, value: str):  # noqa: B902, N805
     @field_validator("diarization_backend")
     def diarization_backend_compatibility_check(cls, value: str):  # noqa: B902, N805
         """Check that the diarization engine is compatible."""
-        if value.lower() not in ["default_diarizer", "longform_diarizer"]:
+        if value.lower() not in ["default-diarizer", "longform-diarizer"]:
             raise ValueError(
                 "The diarization backend must be one of `default_diarizer` or"
                 " `longform_diarizer`."
@@ -323,7 +323,7 @@ def __post_init__(self):
     extra_languages=extra_languages,
     extra_languages_model_paths=extra_languages_model_paths,
     # Diarization
-    diarization_backend=getenv("DIARIZATION_BACKEND", "longform_diarizer"),
+    diarization_backend=getenv("DIARIZATION_BACKEND", "longform-diarizer"),
     window_lengths=window_lengths,
     shift_lengths=shift_lengths,
     multiscale_weights=multiscale_weights,

diff --git a/src/wordcab_transcribe/services/asr_service.py b/src/wordcab_transcribe/services/asr_service.py
@@ -335,7 +335,7 @@ def create_transcription_local_service(self) -> None:
 
     def create_diarization_local_service(self) -> None:
         """Create a local diarization service."""
-        if settings.diarization_backend == "longform_diarizer":
+        if settings.diarization_backend == "longform-diarizer":
             self.local_services.diarization = LongFormDiarizeService(
                 device=self.device,
             )
@@ -653,7 +653,7 @@ async def process_diarization(self, task: ASRTask, debug_mode: bool) -> None:
         """
         try:
             if isinstance(task.diarization.execution, LocalExecution):
-                if settings.diarization_backend == "longform_diarizer":
+                if settings.diarization_backend == "longform-diarizer":
                     out = await time_and_tell_async(
                         lambda: self.local_services.diarization(
                             waveform=task.audio,
@@ -1101,7 +1101,7 @@ def __init__(
         """Initialize the ASRDiarizationOnly class."""
         super().__init__()
 
-        if settings.diarization_backend == "longform_diarizer":
+        if settings.diarization_backend == "longform-diarizer":
             self.diarization_service = LongFormDiarizeService(
                 device=self.device,
             )