Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the api #213

Merged
merged 12 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ COPY ./.env /app/.env

WORKDIR /app

CMD ["uvicorn", "--reload", "--host=0.0.0.0", "--port=5001", "wordcab_transcribe.main:app"]
CMD ["uvicorn", "--host=0.0.0.0", "--port=5001", "wordcab_transcribe.main:app"]
1 change: 0 additions & 1 deletion notebooks/async_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"diarization": False, # Longer processing time but speaker segment attribution
"source_lang": "en", # optional, default is "en"
"timestamps": "s", # optional, default is "s". Can be "s", "ms" or "hms".
"use_batch": False, # optional, default is False
"internal_vad": False, # optional, default is False
"word_timestamps": True, # optional, default is False
}
Expand Down
5 changes: 2 additions & 3 deletions notebooks/youtube_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
# params = {"url": "https://youtu.be/vAvcxeXtBz0"}
# params = {"url": "https://youtu.be/pmjrj_TrOEI"}
# params = {"url": "https://youtu.be/SVwLEocqK0E"} # 2h - 3 speakers
# params = {"url": "https://youtu.be/ry9SYnV3svc"} # eng sample - 2 speakers
params = {"url": "https://youtu.be/ry9SYnV3svc"} # eng sample - 2 speakers
# params = {"url": "https://youtu.be/oAhVu3HvWnw"}
# params = {"url": "https://youtu.be/sfQMxf9Dm8I"}
# params = {"url": "https://youtu.be/uLBZf9eS4Y0"}
params = {"url": "https://youtu.be/JJbtS8CMr80"} # 4h - multiple speakers
# params = {"url": "https://youtu.be/JJbtS8CMr80"} # 4h - multiple speakers

data = {
"alignment": False, # Longer processing time but better timestamps
"num_speakers": -1, # Leave at -1 to guess the number of speakers
"diarization": True, # Longer processing time but speaker segment attribution
"source_lang": "nl", # optional, default is "en"
"timestamps": "s", # optional, default is "s". Can be "s", "ms" or "hms".
"use_batch": False, # optional, default is False
"internal_vad": False, # optional, default is False
"word_timestamps": False, # optional, default is False
}
Expand Down
169 changes: 153 additions & 16 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,30 @@
CortexPayload,
CortexUrlResponse,
CortexYoutubeResponse,
ProcessTimes,
Timestamps,
Utterance,
Word,
YouTubeResponse,
)


def test_process_times() -> None:
"""Test the ProcessTimes model."""
times = ProcessTimes(
total=10.0,
transcription=5.0,
diarization=None,
alignment=None,
post_processing=2.0
)
assert times.total == 10.0
assert times.transcription == 5.0
assert times.diarization is None
assert times.alignment is None
assert times.post_processing == 2.0


def test_timestamps() -> None:
"""Test the Timestamps enum."""
assert Timestamps.seconds == "s"
Expand Down Expand Up @@ -142,11 +159,14 @@ def test_audio_request() -> None:
assert request.dual_channel is True
assert request.source_lang == "en"
assert request.timestamps == "s"
assert request.use_batch is False
assert request.vocab == []
assert request.word_timestamps is False
assert request.internal_vad is False
assert request.repetition_penalty == 1.2
assert request.compression_ratio_threshold == 2.4
assert request.log_prob_threshold == -1.0
assert request.no_speech_threshold == 0.6
assert request.condition_on_previous_text is True


def test_audio_response() -> None:
Expand All @@ -160,11 +180,19 @@ def test_audio_response() -> None:
dual_channel=False,
source_lang="en",
timestamps="s",
use_batch=False,
vocab=["custom company", "custom product"],
word_timestamps=False,
internal_vad=False,
repetition_penalty=1.2,
compression_ratio_threshold=1.8,
log_prob_threshold=-1.0,
no_speech_threshold=0.4,
condition_on_previous_text=False,
process_times = ProcessTimes(
total=10.0,
transcription=5.0,
post_processing=2.0,
)
)
assert response.utterances == []
assert response.audio_duration == 0.0
Expand All @@ -174,11 +202,19 @@ def test_audio_response() -> None:
assert response.dual_channel is False
assert response.source_lang == "en"
assert response.timestamps == "s"
assert response.use_batch is False
assert response.vocab == ["custom company", "custom product"]
assert response.word_timestamps is False
assert response.internal_vad is False
assert response.repetition_penalty == 1.2
assert response.compression_ratio_threshold == 1.8
assert response.log_prob_threshold == -1.0
assert response.no_speech_threshold == 0.4
assert response.condition_on_previous_text is False
assert response.process_times == ProcessTimes(
total=10.0
transcription=5.0,
post_processing=2.0
)

response = AudioResponse(
utterances=[
Expand All @@ -204,11 +240,19 @@ def test_audio_response() -> None:
dual_channel=True,
source_lang="en",
timestamps="s",
use_batch=False,
vocab=["custom company", "custom product"],
word_timestamps=True,
internal_vad=False,
repetition_penalty=1.2,
compression_ratio_threshold=1.8,
log_prob_threshold=-1.0,
no_speech_threshold=0.4,
condition_on_previous_text=False,
process_times = ProcessTimes(
total=10.0,
transcription=5.0,
post_processing=2.0,
)
)
assert response.utterances == [
Utterance(
Expand All @@ -233,11 +277,19 @@ def test_audio_response() -> None:
assert response.dual_channel is True
assert response.source_lang == "en"
assert response.timestamps == "s"
assert response.use_batch is False
assert response.vocab == ["custom company", "custom product"]
assert response.word_timestamps is True
assert response.internal_vad is False
assert response.repetition_penalty == 1.2
assert response.compression_ratio_threshold == 1.8
assert response.log_prob_threshold == -1.0
assert response.no_speech_threshold == 0.4
assert response.condition_on_previous_text is False
assert response.process_times == ProcessTimes(
total=10.0
transcription=5.0,
post_processing=2.0
)


def test_base_request_valid() -> None:
Expand All @@ -261,10 +313,13 @@ def test_base_request_default() -> None:
assert req.diarization is False
assert req.source_lang == "en"
assert req.timestamps == "s"
assert req.use_batch is False
assert req.word_timestamps is False
assert req.internal_vad is False
assert req.repetition_penalty == 1.2
assert req.compression_ratio_threshold == 2.4
assert req.log_prob_threshold == -1.0
assert req.no_speech_threshold == 0.6
assert req.condition_on_previous_text is True


def test_base_request_invalid() -> None:
Expand Down Expand Up @@ -298,11 +353,21 @@ def test_base_response() -> None:
diarization=False,
source_lang="en",
timestamps="s",
use_batch=False,
vocab=["custom company", "custom product"],
word_timestamps=False,
internal_vad=False,
repetition_penalty=1.2,
compression_ratio_threshold=1.8,
log_prob_threshold=-1.0,
no_speech_threshold=0.4,
condition_on_previous_text=False,
process_times = ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
)
)
assert response.utterances == [
Utterance(
Expand All @@ -326,11 +391,21 @@ def test_base_response() -> None:
assert response.diarization is False
assert response.source_lang == "en"
assert response.timestamps == "s"
assert response.use_batch is False
assert response.vocab == ["custom company", "custom product"]
assert response.word_timestamps is False
assert response.internal_vad is False
assert response.repetition_penalty == 1.2
assert response.compression_ratio_threshold == 1.8
assert response.log_prob_threshold == -1.0
assert response.no_speech_threshold == 0.4
assert response.condition_on_previous_text is False
assert response.process_times == ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
)


def test_cortex_error() -> None:
Expand All @@ -353,7 +428,6 @@ def test_cortex_payload() -> None:
dual_channel=False,
source_lang="en",
timestamps="s",
use_batch=False,
word_timestamps=False,
internal_vad=False,
repetition_penalty=1.2,
Expand All @@ -369,11 +443,14 @@ def test_cortex_payload() -> None:
assert payload.dual_channel is False
assert payload.source_lang == "en"
assert payload.timestamps == "s"
assert payload.use_batch is False
assert payload.vocab == []
assert payload.word_timestamps is False
assert payload.internal_vad is False
assert payload.repetition_penalty == 1.2
assert payload.compression_ratio_threshold == 2.4
assert payload.log_prob_threshold == -1.0
assert payload.no_speech_threshold == 0.6
assert payload.condition_on_previous_text is True
assert payload.job_name == "test_job"
assert payload.ping is False

Expand Down Expand Up @@ -403,11 +480,21 @@ def test_cortex_url_response() -> None:
diarization=False,
source_lang="en",
timestamps="s",
use_batch=False,
vocab=["custom company", "custom product"],
word_timestamps=False,
internal_vad=False,
repetition_penalty=1.2,
compression_ratio_threshold=1.8,
log_prob_threshold=-1.0,
no_speech_threshold=0.4,
condition_on_previous_text=False,
process_times = ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
),
dual_channel=False,
job_name="test_job",
request_id="test_request_id",
Expand All @@ -434,11 +521,21 @@ def test_cortex_url_response() -> None:
assert response.diarization is False
assert response.source_lang == "en"
assert response.timestamps == "s"
assert response.use_batch is False
assert response.vocab == ["custom company", "custom product"]
assert response.word_timestamps is False
assert response.internal_vad is False
assert response.repetition_penalty == 1.2
assert response.compression_ratio_threshold == 1.8
assert response.log_prob_threshold == -1.0
assert response.no_speech_threshold == 0.4
assert response.condition_on_previous_text is False
assert response.process_times == ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
)
assert response.dual_channel is False
assert response.job_name == "test_job"
assert response.request_id == "test_request_id"
Expand Down Expand Up @@ -469,11 +566,21 @@ def test_cortex_youtube_response() -> None:
diarization=False,
source_lang="en",
timestamps="s",
use_batch=False,
vocab=["custom company", "custom product"],
word_timestamps=False,
internal_vad=False,
repetition_penalty=1.2,
compression_ratio_threshold=1.8,
log_prob_threshold=-1.0,
no_speech_threshold=0.4,
condition_on_previous_text=False,
process_times = ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
),
video_url="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
job_name="test_job",
request_id="test_request_id",
Expand All @@ -500,11 +607,21 @@ def test_cortex_youtube_response() -> None:
assert response.diarization is False
assert response.source_lang == "en"
assert response.timestamps == "s"
assert response.use_batch is False
assert response.vocab == ["custom company", "custom product"]
assert response.word_timestamps is False
assert response.internal_vad is False
assert response.repetition_penalty == 1.2
assert response.compression_ratio_threshold == 1.8
assert response.log_prob_threshold == -1.0
assert response.no_speech_threshold == 0.4
assert response.condition_on_previous_text is False
assert response.process_times == ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
)
assert response.video_url == "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
assert response.job_name == "test_job"
assert response.request_id == "test_request_id"
Expand Down Expand Up @@ -535,11 +652,21 @@ def test_youtube_response() -> None:
diarization=False,
source_lang="en",
timestamps="s",
use_batch=False,
vocab=["custom company", "custom product"],
word_timestamps=False,
internal_vad=False,
repetition_penalty=1.2,
compression_ratio_threshold=1.8,
log_prob_threshold=-1.0,
no_speech_threshold=0.4,
condition_on_previous_text=False,
process_times = ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
),
video_url="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
)
assert response.utterances == [
Expand All @@ -564,9 +691,19 @@ def test_youtube_response() -> None:
assert response.diarization is False
assert response.source_lang == "en"
assert response.timestamps == "s"
assert response.use_batch is False
assert response.vocab == ["custom company", "custom product"]
assert response.word_timestamps is False
assert response.internal_vad is False
assert response.repetition_penalty == 1.2
assert response.compression_ratio_threshold == 1.8
assert response.log_prob_threshold == -1.0
assert response.no_speech_threshold == 0.4
assert response.condition_on_previous_text is False
assert response.process_times == ProcessTimes(
total=10.0,
transcription=5.0,
diarization=2.0,
alignment=2.0,
post_processing=1.0,
)
assert response.video_url == "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
Loading