-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #489 from classtranscribe/UpdateForWhisper
Update for whisper
- Loading branch information
Showing
25 changed files
with
1,573 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
{ | ||
"sdk": { | ||
"version": "8.0.201" | ||
"version": "8.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
{ | ||
"sdk": { | ||
"version": "8.0.401" | ||
"version": "8.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
venv/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import os | ||
import subprocess | ||
import json | ||
from time import perf_counter | ||
from ffmpy import FFmpeg | ||
import utils | ||
|
||
# Path to the Whisper executable inside the container | ||
WHISPER_EXECUTABLE = os.environ.get('WHISPER_EXE','whisper') # Executable 'main' is assumed to be in the same directory as this script | ||
MODEL = os.environ.get('WHISPER_MODEL','models/ggml-base.en.bin') | ||
|
||
def convert_video_to_wav(input_filepath, offset=None): | ||
""" | ||
Converts a video file to WAV format using ffmpy. | ||
""" | ||
try: | ||
start_time = perf_counter() | ||
if offset is None: | ||
offset = 0.0 | ||
|
||
nthreads = utils.getMaxThreads() | ||
|
||
print(f"Converting video '{input_filepath}' to WAV with offset {offset} using {nthreads} thread(s).") | ||
output_filepath = utils.getTmpFile() | ||
ext = '.wav' | ||
|
||
ff = FFmpeg( | ||
global_options=f"-hide_banner -loglevel error -nostats -threads {nthreads}", | ||
inputs={input_filepath: f'-ss {offset}'}, | ||
outputs={output_filepath: '-c:a pcm_s16le -ac 1 -y -ar 16000 -f wav'} | ||
) | ||
print(f"Starting conversion. Audio output will be saved in {output_filepath}") | ||
ff.run() | ||
end_time = perf_counter() | ||
print(f"Conversion complete. Duration: {int(end_time - start_time)} seconds") | ||
return output_filepath, ext | ||
except Exception as e: | ||
print("Exception during conversion:" + str(e)) | ||
raise e | ||
|
||
def transcribe_audio(media_filepath, testing=False): | ||
if testing: | ||
json_output_path = f"/PythonRpcServer/transcribe_hellohellohello.wav.json" | ||
with open(json_output_path, 'r') as json_file: | ||
transcription_result = json.load(json_file) | ||
|
||
# Print the transcription result (testing purpose) | ||
print("Transcription result:") | ||
print(json.dumps(transcription_result, indent=4)) | ||
|
||
return transcription_result | ||
|
||
if media_filepath == 'TEST-transcribe_example_result': | ||
result_json_file = 'transcribe_exampleffmp_result.json' | ||
with open(result_json_file, 'r') as json_file: | ||
transcription_result = json.load(json_file) | ||
return transcription_result | ||
|
||
# Ensure the media file exists | ||
if not os.path.exists(media_filepath): | ||
raise FileNotFoundError(f"Media file not found: {media_filepath}") | ||
|
||
# convert video to wav if needed | ||
wav_created = False # Track if WAV was created | ||
if not media_filepath.endswith('.wav'): | ||
media_filepath, _ = convert_video_to_wav(media_filepath) | ||
wav_created = True # WAV file was created | ||
|
||
|
||
# Path to the output JSON file that Whisper will generate | ||
json_output_path = f"{media_filepath}.json" | ||
if os.path.exists(json_output_path): | ||
os.remove(json_output_path) | ||
|
||
# Command to run Whisper.cpp inside the container using the main executable | ||
whisper_command = [ | ||
WHISPER_EXECUTABLE, # Path to Whisper executable | ||
'-ojf', # Output as JSON file | ||
'-f', media_filepath, # Media file path | ||
'-m', MODEL | ||
] | ||
|
||
print("Running Whisper transcription inside the container...") | ||
|
||
# Execute the Whisper command | ||
result = subprocess.run(whisper_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
|
||
# Handle command failure | ||
if result.returncode != 0: | ||
raise Exception(f"Whisper failed with error:\n{result.stderr.decode('utf-8')}") | ||
|
||
# Check if the output JSON file was generated | ||
print(f"Checking for JSON output at: {json_output_path}") | ||
if not os.path.exists(json_output_path): | ||
raise FileNotFoundError(f"Expected JSON output file not found: {json_output_path}") | ||
|
||
# Load the JSON transcription result | ||
with open(json_output_path, 'r') as json_file: | ||
transcription_result = json.load(json_file) | ||
|
||
# Print the transcription result (testing purpose) | ||
print("Transcription result:") | ||
print(json.dumps(transcription_result, indent=4)) | ||
|
||
# Delete the JSON file after reading it | ||
os.remove(json_output_path) | ||
print(f"Deleted the JSON file: {json_output_path}") | ||
|
||
if wav_created: | ||
try: | ||
os.remove(media_filepath) | ||
print(f"Deleted the WAV file: {media_filepath}") | ||
except Exception as e: | ||
print(f"Error deleting WAV file: {str(e)}") | ||
|
||
return transcription_result | ||
|
||
# Example usage | ||
if __name__ == '__main__': | ||
# Example media file path inside the container (the actual path will depend on where the file is located) | ||
json_output_path = f"/PythonRpcServer/transcribe_hellohellohello.wav.json" | ||
with open(json_output_path, 'r') as json_file: | ||
transcription_result = json.load(json_file) | ||
|
||
print("Transcription Result:", json.dumps(transcription_result, indent=4)) | ||
|
Oops, something went wrong.