diff --git a/ClassTranscribeDatabase/CommonUtils.cs b/ClassTranscribeDatabase/CommonUtils.cs index 1e178e23..4713fd64 100644 --- a/ClassTranscribeDatabase/CommonUtils.cs +++ b/ClassTranscribeDatabase/CommonUtils.cs @@ -23,7 +23,7 @@ public enum TaskType DownloadPlaylistInfo = 3, DownloadMedia = 4, ConvertMedia = 5, - TranscribeVideo = 6, + // TranscribeVideo = 6, ProcessVideo = 7, Aggregator = 8, GenerateVTTFile = 9, @@ -39,7 +39,9 @@ public enum TaskType PythonCrawler = 19, DescribeVideo = 20, - DescribeImage = 21 + DescribeImage = 21, + AzureTranscribeVideo = 22, + LocalTranscribeVideo = 23 } diff --git a/ClassTranscribeDatabase/global.json b/ClassTranscribeDatabase/global.json index 4100a4a8..215288b9 100644 --- a/ClassTranscribeDatabase/global.json +++ b/ClassTranscribeDatabase/global.json @@ -1,5 +1,5 @@ { "sdk": { - "version": "8.0.201" + "version": "8.0" } } \ No newline at end of file diff --git a/ClassTranscribeServer/Controllers/PlaylistsController.cs b/ClassTranscribeServer/Controllers/PlaylistsController.cs index 118ad68c..e8083935 100644 --- a/ClassTranscribeServer/Controllers/PlaylistsController.cs +++ b/ClassTranscribeServer/Controllers/PlaylistsController.cs @@ -170,7 +170,7 @@ public async Task>> GetPlaylists2(string o JsonMetadata = m.JsonMetadata, CreatedAt = m.CreatedAt, SceneDetectReady = m.Video.HasSceneObjectData(), - Ready = m.Video != null && "NoError" == m.Video.TranscriptionStatus , + Ready = m.Video != null && Video.TranscriptionStatusMessages.NOERROR == m.Video.TranscriptionStatus , SourceType = m.SourceType, Duration = m.Video?.Duration, PublishStatus = m.PublishStatus, @@ -265,7 +265,7 @@ public async Task> GetPlaylist(string id) PublishStatus = m.PublishStatus, Options = m.GetOptionsAsJson(), SceneDetectReady = m.Video != null && m.Video.HasSceneObjectData(), - Ready = m.Video != null && "NoError" == m.Video.TranscriptionStatus , + Ready = m.Video != null && Video.TranscriptionStatusMessages.NOERROR == m.Video.TranscriptionStatus , Video = m.Video == null ? null : new VideoDTO { Id = m.Video.Id, diff --git a/ClassTranscribeServer/Utils/WakeDownloader.cs b/ClassTranscribeServer/Utils/WakeDownloader.cs index b2c939a7..22f6479a 100644 --- a/ClassTranscribeServer/Utils/WakeDownloader.cs +++ b/ClassTranscribeServer/Utils/WakeDownloader.cs @@ -104,7 +104,7 @@ public void TranscribeVideo(string videoOrMediaId, bool deleteExisting) { JObject msg = new JObject { - { "Type", TaskType.TranscribeVideo.ToString() }, + { "Type", TaskType.LocalTranscribeVideo.ToString() }, { "videoOrMediaId", videoOrMediaId }, { "DeleteExisting", deleteExisting } }; diff --git a/ClassTranscribeServer/global.json b/ClassTranscribeServer/global.json index a679dd12..215288b9 100644 --- a/ClassTranscribeServer/global.json +++ b/ClassTranscribeServer/global.json @@ -1,5 +1,5 @@ { "sdk": { - "version": "8.0.401" + "version": "8.0" } } \ No newline at end of file diff --git a/PythonRpcServer/.gitignore b/PythonRpcServer/.gitignore new file mode 100644 index 00000000..f7275bbb --- /dev/null +++ b/PythonRpcServer/.gitignore @@ -0,0 +1 @@ +venv/ diff --git a/PythonRpcServer/requirements.txt b/PythonRpcServer/requirements.txt index a95812ba..8e8d7258 100644 --- a/PythonRpcServer/requirements.txt +++ b/PythonRpcServer/requirements.txt @@ -32,8 +32,9 @@ wcwidth==0.2.13 # Not versioned numpy -pytube # if not available, use the tar.gz package (see Dockerfile) - +# No longer maintained pytube # if not available, use the tar.gz package (see Dockerfile) +yt-dlp +#Always get latest # protobuf version 3.18.3 causes NotImplementedError("To be implemented") in PythonRpcServer/mediaprovider.py # Likely need to coordinate updating the C# version too diff --git a/PythonRpcServer/server.py b/PythonRpcServer/server.py index 943c2c5c..c568d32c 100644 --- a/PythonRpcServer/server.py +++ b/PythonRpcServer/server.py @@ -12,6 +12,9 @@ from echo import EchoProvider from kaltura import KalturaProvider from mediaprovider import InvalidPlaylistInfoException +from transcribe import transcribe_audio + +import json import hasher import ffmpeg # import phrasehinter @@ -41,6 +44,18 @@ def LogWorker(logId, worker): class PythonServerServicer(ct_pb2_grpc.PythonServerServicer): + # Transcribe it into a json string from the transcribe text + # Make it returns a json string + # change name to TranscribeRPC + # def CaptionRPC(self, request, context): + # #See CaptionRequest + # print( f"CaptionRPC({request.logId};{request.refId};{request.filePath};{request.phraseHints};{request.courseHints};{request.outputLanguages})") + # kalturaprovider = KalturaProvider() + # result = LogWorker(f"CaptionRPC({request.filePath})", lambda: kalturaprovider.getCaptions(request.refId)) + # return ct_pb2.JsonString(json = result) + + + def GetScenesRPC(self, request, context): raise NotImplementedError('Implementation now in pyapi') # res = scenedetector.find_scenes(request.filePath) @@ -113,6 +128,23 @@ def ComputeFileHash(self, request, context): def GetMediaInfoRPC(self, request, context): result = LogWorker(f"GetMediaInfo({request.filePath})", lambda: ffmpeg.getMediaInfo(request.filePath)) return ct_pb2.JsonString(json = result) + + + def TranscribeAudioRPC(self, request, context): + print(f"TranscribeAudioRPC({request.logId};{request.filePath})") + try: + logging.info(f"Starting transcription for file: {request.filePath}") + transcription_result = LogWorker( + f"TranscribeAudioRPC({request.filePath})", + lambda: transcribe_audio(request.filePath, request.testing) + ) + logging.info(f"Transcription completed successfully for: {request.filePath}") + return ct_pb2.JsonString(json=json.dumps(transcription_result)) + + except Exception as e: + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(f"Transcription failed: {str(e)}") + return ct_pb2.JsonString(json=json.dumps({"error": str(e)})) def serve(): print("Python RPC Server Starting") @@ -120,7 +152,7 @@ def serve(): # Until we can ensure no timeouts on remote services, the default here is set to a conservative low number # This is to ensure we can still make progress even if every python tasks tries to use all cpu cores. max_workers=int(os.getenv('NUM_PYTHON_WORKERS', 3)) - print(f"max_workers={max_workers}") + print(f"max_workers={max_workers}. Starting up grpc server...") server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers)) diff --git a/PythonRpcServer/transcribe.py b/PythonRpcServer/transcribe.py new file mode 100644 index 00000000..71344698 --- /dev/null +++ b/PythonRpcServer/transcribe.py @@ -0,0 +1,126 @@ +import os +import subprocess +import json +from time import perf_counter +from ffmpy import FFmpeg +import utils + +# Path to the Whisper executable inside the container +WHISPER_EXECUTABLE = os.environ.get('WHISPER_EXE','whisper') # Executable 'main' is assumed to be in the same directory as this script +MODEL = os.environ.get('WHISPER_MODEL','models/ggml-base.en.bin') + +def convert_video_to_wav(input_filepath, offset=None): + """ + Converts a video file to WAV format using ffmpy. + """ + try: + start_time = perf_counter() + if offset is None: + offset = 0.0 + + nthreads = utils.getMaxThreads() + + print(f"Converting video '{input_filepath}' to WAV with offset {offset} using {nthreads} thread(s).") + output_filepath = utils.getTmpFile() + ext = '.wav' + + ff = FFmpeg( + global_options=f"-hide_banner -loglevel error -nostats -threads {nthreads}", + inputs={input_filepath: f'-ss {offset}'}, + outputs={output_filepath: '-c:a pcm_s16le -ac 1 -y -ar 16000 -f wav'} + ) + print(f"Starting conversion. Audio output will be saved in {output_filepath}") + ff.run() + end_time = perf_counter() + print(f"Conversion complete. Duration: {int(end_time - start_time)} seconds") + return output_filepath, ext + except Exception as e: + print("Exception during conversion:" + str(e)) + raise e + +def transcribe_audio(media_filepath, testing=False): + if testing: + json_output_path = f"/PythonRpcServer/transcribe_hellohellohello.wav.json" + with open(json_output_path, 'r') as json_file: + transcription_result = json.load(json_file) + + # Print the transcription result (testing purpose) + print("Transcription result:") + print(json.dumps(transcription_result, indent=4)) + + return transcription_result + + if media_filepath == 'TEST-transcribe_example_result': + result_json_file = 'transcribe_exampleffmp_result.json' + with open(result_json_file, 'r') as json_file: + transcription_result = json.load(json_file) + return transcription_result + + # Ensure the media file exists + if not os.path.exists(media_filepath): + raise FileNotFoundError(f"Media file not found: {media_filepath}") + + # convert video to wav if needed + wav_created = False # Track if WAV was created + if not media_filepath.endswith('.wav'): + media_filepath, _ = convert_video_to_wav(media_filepath) + wav_created = True # WAV file was created + + + # Path to the output JSON file that Whisper will generate + json_output_path = f"{media_filepath}.json" + if os.path.exists(json_output_path): + os.remove(json_output_path) + + # Command to run Whisper.cpp inside the container using the main executable + whisper_command = [ + WHISPER_EXECUTABLE, # Path to Whisper executable + '-ojf', # Output as JSON file + '-f', media_filepath, # Media file path + '-m', MODEL + ] + + print("Running Whisper transcription inside the container...") + + # Execute the Whisper command + result = subprocess.run(whisper_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + # Handle command failure + if result.returncode != 0: + raise Exception(f"Whisper failed with error:\n{result.stderr.decode('utf-8')}") + + # Check if the output JSON file was generated + print(f"Checking for JSON output at: {json_output_path}") + if not os.path.exists(json_output_path): + raise FileNotFoundError(f"Expected JSON output file not found: {json_output_path}") + + # Load the JSON transcription result + with open(json_output_path, 'r') as json_file: + transcription_result = json.load(json_file) + + # Print the transcription result (testing purpose) + print("Transcription result:") + print(json.dumps(transcription_result, indent=4)) + + # Delete the JSON file after reading it + os.remove(json_output_path) + print(f"Deleted the JSON file: {json_output_path}") + + if wav_created: + try: + os.remove(media_filepath) + print(f"Deleted the WAV file: {media_filepath}") + except Exception as e: + print(f"Error deleting WAV file: {str(e)}") + + return transcription_result + +# Example usage +if __name__ == '__main__': + # Example media file path inside the container (the actual path will depend on where the file is located) + json_output_path = f"/PythonRpcServer/transcribe_hellohellohello.wav.json" + with open(json_output_path, 'r') as json_file: + transcription_result = json.load(json_file) + + print("Transcription Result:", json.dumps(transcription_result, indent=4)) + \ No newline at end of file diff --git a/PythonRpcServer/transcribe_example_result.json b/PythonRpcServer/transcribe_example_result.json new file mode 100644 index 00000000..1726e02d --- /dev/null +++ b/PythonRpcServer/transcribe_example_result.json @@ -0,0 +1,1077 @@ +{ + "systeminfo": "AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 | CANN = 0", + "model": { + "type": "base", + "multilingual": false, + "vocab": 51864, + "audio": { + "ctx": 1500, + "state": 512, + "head": 8, + "layer": 6 + }, + "text": { + "ctx": 448, + "state": 512, + "head": 8, + "layer": 6 + }, + "mels": 80, + "ftype": 1 + }, + "params": { + "model": "models/ggml-base.en.bin", + "language": "en", + "translate": false + }, + "result": { + "language": "en" + }, + "transcription": [ + { + "timestamps": { + "from": "00:00:00.000", + "to": "00:00:07.320" + }, + "offsets": { + "from": 0, + "to": 7320 + }, + "text": " Reading homeworks are due early Tuesday at 9pm unless announced otherwise.", + "tokens": [ + { + "text": "[_BEG_]", + "timestamps": { + "from": "00:00:00.000", + "to": "00:00:00.000" + }, + "offsets": { + "from": 0, + "to": 0 + }, + "id": 50363, + "p": 0.848947, + "t_dtw": -1 + }, + { + "text": " Reading", + "timestamps": { + "from": "00:00:00.000", + "to": "00:00:00.750" + }, + "offsets": { + "from": 0, + "to": 750 + }, + "id": 11725, + "p": 0.407652, + "t_dtw": -1 + }, + { + "text": " hom", + "timestamps": { + "from": "00:00:00.750", + "to": "00:00:01.070" + }, + "offsets": { + "from": 750, + "to": 1070 + }, + "id": 3488, + "p": 0.254302, + "t_dtw": -1 + }, + { + "text": "eworks", + "timestamps": { + "from": "00:00:01.080", + "to": "00:00:01.710" + }, + "offsets": { + "from": 1080, + "to": 1710 + }, + "id": 19653, + "p": 0.981512, + "t_dtw": -1 + }, + { + "text": " are", + "timestamps": { + "from": "00:00:01.710", + "to": "00:00:01.970" + }, + "offsets": { + "from": 1710, + "to": 1970 + }, + "id": 389, + "p": 0.708301, + "t_dtw": -1 + }, + { + "text": " due", + "timestamps": { + "from": "00:00:02.060", + "to": "00:00:02.350" + }, + "offsets": { + "from": 2060, + "to": 2350 + }, + "id": 2233, + "p": 0.54323, + "t_dtw": -1 + }, + { + "text": " early", + "timestamps": { + "from": "00:00:02.350", + "to": "00:00:02.880" + }, + "offsets": { + "from": 2350, + "to": 2880 + }, + "id": 1903, + "p": 0.315304, + "t_dtw": -1 + }, + { + "text": " Tuesday", + "timestamps": { + "from": "00:00:02.880", + "to": "00:00:03.630" + }, + "offsets": { + "from": 2880, + "to": 3630 + }, + "id": 3431, + "p": 0.614094, + "t_dtw": -1 + }, + { + "text": " at", + "timestamps": { + "from": "00:00:03.630", + "to": "00:00:03.840" + }, + "offsets": { + "from": 3630, + "to": 3840 + }, + "id": 379, + "p": 0.960146, + "t_dtw": -1 + }, + { + "text": " 9", + "timestamps": { + "from": "00:00:03.840", + "to": "00:00:04.150" + }, + "offsets": { + "from": 3840, + "to": 4150 + }, + "id": 860, + "p": 0.923494, + "t_dtw": -1 + }, + { + "text": "pm", + "timestamps": { + "from": "00:00:04.160", + "to": "00:00:04.370" + }, + "offsets": { + "from": 4160, + "to": 4370 + }, + "id": 4426, + "p": 0.398094, + "t_dtw": -1 + }, + { + "text": " unless", + "timestamps": { + "from": "00:00:04.370", + "to": "00:00:05.010" + }, + "offsets": { + "from": 4370, + "to": 5010 + }, + "id": 4556, + "p": 0.787165, + "t_dtw": -1 + }, + { + "text": " announced", + "timestamps": { + "from": "00:00:05.010", + "to": "00:00:05.970" + }, + "offsets": { + "from": 5010, + "to": 5970 + }, + "id": 3414, + "p": 0.980173, + "t_dtw": -1 + }, + { + "text": " otherwise", + "timestamps": { + "from": "00:00:05.970", + "to": "00:00:07.000" + }, + "offsets": { + "from": 5970, + "to": 7000 + }, + "id": 4306, + "p": 0.942659, + "t_dtw": -1 + }, + { + "text": ".", + "timestamps": { + "from": "00:00:07.000", + "to": "00:00:07.320" + }, + "offsets": { + "from": 7000, + "to": 7320 + }, + "id": 13, + "p": 0.86153, + "t_dtw": -1 + }, + { + "text": "[_TT_366]", + "timestamps": { + "from": "00:00:07.320", + "to": "00:00:07.320" + }, + "offsets": { + "from": 7320, + "to": 7320 + }, + "id": 50729, + "p": 0.0230646, + "t_dtw": -1 + } + ] + }, + { + "timestamps": { + "from": "00:00:07.320", + "to": "00:00:12.600" + }, + "offsets": { + "from": 7320, + "to": 12600 + }, + "text": " We post each week's homework at least one full week before it's due date.", + "tokens": [ + { + "text": " We", + "timestamps": { + "from": "00:00:07.320", + "to": "00:00:07.490" + }, + "offsets": { + "from": 7320, + "to": 7490 + }, + "id": 775, + "p": 0.961556, + "t_dtw": -1 + }, + { + "text": " post", + "timestamps": { + "from": "00:00:07.490", + "to": "00:00:07.840" + }, + "offsets": { + "from": 7490, + "to": 7840 + }, + "id": 1281, + "p": 0.940814, + "t_dtw": -1 + }, + { + "text": " each", + "timestamps": { + "from": "00:00:07.840", + "to": "00:00:08.190" + }, + "offsets": { + "from": 7840, + "to": 8190 + }, + "id": 1123, + "p": 0.990758, + "t_dtw": -1 + }, + { + "text": " week", + "timestamps": { + "from": "00:00:08.190", + "to": "00:00:08.540" + }, + "offsets": { + "from": 8190, + "to": 8540 + }, + "id": 1285, + "p": 0.98414, + "t_dtw": -1 + }, + { + "text": "'s", + "timestamps": { + "from": "00:00:08.540", + "to": "00:00:08.710" + }, + "offsets": { + "from": 8540, + "to": 8710 + }, + "id": 338, + "p": 0.852609, + "t_dtw": -1 + }, + { + "text": " homework", + "timestamps": { + "from": "00:00:08.710", + "to": "00:00:09.420" + }, + "offsets": { + "from": 8710, + "to": 9420 + }, + "id": 26131, + "p": 0.9842, + "t_dtw": -1 + }, + { + "text": " at", + "timestamps": { + "from": "00:00:09.420", + "to": "00:00:09.590" + }, + "offsets": { + "from": 9420, + "to": 9590 + }, + "id": 379, + "p": 0.989681, + "t_dtw": -1 + }, + { + "text": " least", + "timestamps": { + "from": "00:00:09.590", + "to": "00:00:10.020" + }, + "offsets": { + "from": 9590, + "to": 10020 + }, + "id": 1551, + "p": 0.992646, + "t_dtw": -1 + }, + { + "text": " one", + "timestamps": { + "from": "00:00:10.030", + "to": "00:00:10.290" + }, + "offsets": { + "from": 10030, + "to": 10290 + }, + "id": 530, + "p": 0.768113, + "t_dtw": -1 + }, + { + "text": " full", + "timestamps": { + "from": "00:00:10.290", + "to": "00:00:10.640" + }, + "offsets": { + "from": 10290, + "to": 10640 + }, + "id": 1336, + "p": 0.589241, + "t_dtw": -1 + }, + { + "text": " week", + "timestamps": { + "from": "00:00:10.640", + "to": "00:00:10.990" + }, + "offsets": { + "from": 10640, + "to": 10990 + }, + "id": 1285, + "p": 0.992968, + "t_dtw": -1 + }, + { + "text": " before", + "timestamps": { + "from": "00:00:10.990", + "to": "00:00:11.510" + }, + "offsets": { + "from": 10990, + "to": 11510 + }, + "id": 878, + "p": 0.988001, + "t_dtw": -1 + }, + { + "text": " it", + "timestamps": { + "from": "00:00:11.530", + "to": "00:00:11.690" + }, + "offsets": { + "from": 11530, + "to": 11690 + }, + "id": 340, + "p": 0.670674, + "t_dtw": -1 + }, + { + "text": "'s", + "timestamps": { + "from": "00:00:11.690", + "to": "00:00:11.850" + }, + "offsets": { + "from": 11690, + "to": 11850 + }, + "id": 338, + "p": 0.866573, + "t_dtw": -1 + }, + { + "text": " due", + "timestamps": { + "from": "00:00:11.860", + "to": "00:00:12.120" + }, + "offsets": { + "from": 11860, + "to": 12120 + }, + "id": 2233, + "p": 0.991537, + "t_dtw": -1 + }, + { + "text": " date", + "timestamps": { + "from": "00:00:12.120", + "to": "00:00:12.600" + }, + "offsets": { + "from": 12120, + "to": 12600 + }, + "id": 3128, + "p": 0.652784, + "t_dtw": -1 + }, + { + "text": ".", + "timestamps": { + "from": "00:00:12.600", + "to": "00:00:12.600" + }, + "offsets": { + "from": 12600, + "to": 12600 + }, + "id": 13, + "p": 0.929698, + "t_dtw": -1 + }, + { + "text": "[_TT_630]", + "timestamps": { + "from": "00:00:12.600", + "to": "00:00:12.600" + }, + "offsets": { + "from": 12600, + "to": 12600 + }, + "id": 50993, + "p": 0.0345322, + "t_dtw": -1 + } + ] + }, + { + "timestamps": { + "from": "00:00:12.600", + "to": "00:00:17.280" + }, + "offsets": { + "from": 12600, + "to": 17280 + }, + "text": " We post solutions almost a day after extended due date.", + "tokens": [ + { + "text": " We", + "timestamps": { + "from": "00:00:12.600", + "to": "00:00:12.790" + }, + "offsets": { + "from": 12600, + "to": 12790 + }, + "id": 775, + "p": 0.99553, + "t_dtw": -1 + }, + { + "text": " post", + "timestamps": { + "from": "00:00:12.790", + "to": "00:00:13.180" + }, + "offsets": { + "from": 12790, + "to": 13180 + }, + "id": 1281, + "p": 0.991259, + "t_dtw": -1 + }, + { + "text": " solutions", + "timestamps": { + "from": "00:00:13.180", + "to": "00:00:14.060" + }, + "offsets": { + "from": 13180, + "to": 14060 + }, + "id": 8136, + "p": 0.965852, + "t_dtw": -1 + }, + { + "text": " almost", + "timestamps": { + "from": "00:00:14.060", + "to": "00:00:14.650" + }, + "offsets": { + "from": 14060, + "to": 14650 + }, + "id": 2048, + "p": 0.395074, + "t_dtw": -1 + }, + { + "text": " a", + "timestamps": { + "from": "00:00:14.650", + "to": "00:00:14.740" + }, + "offsets": { + "from": 14650, + "to": 14740 + }, + "id": 257, + "p": 0.984885, + "t_dtw": -1 + }, + { + "text": " day", + "timestamps": { + "from": "00:00:14.740", + "to": "00:00:15.030" + }, + "offsets": { + "from": 14740, + "to": 15030 + }, + "id": 1110, + "p": 0.997579, + "t_dtw": -1 + }, + { + "text": " after", + "timestamps": { + "from": "00:00:15.030", + "to": "00:00:15.520" + }, + "offsets": { + "from": 15030, + "to": 15520 + }, + "id": 706, + "p": 0.997904, + "t_dtw": -1 + }, + { + "text": " extended", + "timestamps": { + "from": "00:00:15.520", + "to": "00:00:16.300" + }, + "offsets": { + "from": 15520, + "to": 16300 + }, + "id": 7083, + "p": 0.59029, + "t_dtw": -1 + }, + { + "text": " due", + "timestamps": { + "from": "00:00:16.300", + "to": "00:00:16.590" + }, + "offsets": { + "from": 16300, + "to": 16590 + }, + "id": 2233, + "p": 0.986455, + "t_dtw": -1 + }, + { + "text": " date", + "timestamps": { + "from": "00:00:16.590", + "to": "00:00:17.040" + }, + "offsets": { + "from": 16590, + "to": 17040 + }, + "id": 3128, + "p": 0.994649, + "t_dtw": -1 + }, + { + "text": ".", + "timestamps": { + "from": "00:00:17.040", + "to": "00:00:17.280" + }, + "offsets": { + "from": 17040, + "to": 17280 + }, + "id": 13, + "p": 0.967296, + "t_dtw": -1 + }, + { + "text": "[_TT_864]", + "timestamps": { + "from": "00:00:17.280", + "to": "00:00:17.280" + }, + "offsets": { + "from": 17280, + "to": 17280 + }, + "id": 51227, + "p": 0.0279233, + "t_dtw": -1 + } + ] + }, + { + "timestamps": { + "from": "00:00:17.280", + "to": "00:00:21.120" + }, + "offsets": { + "from": 17280, + "to": 21120 + }, + "text": " Links to future homeworks and solutions are placeholders.", + "tokens": [ + { + "text": " Links", + "timestamps": { + "from": "00:00:17.280", + "to": "00:00:17.640" + }, + "offsets": { + "from": 17280, + "to": 17640 + }, + "id": 21691, + "p": 0.970288, + "t_dtw": -1 + }, + { + "text": " to", + "timestamps": { + "from": "00:00:17.640", + "to": "00:00:17.780" + }, + "offsets": { + "from": 17640, + "to": 17780 + }, + "id": 284, + "p": 0.997626, + "t_dtw": -1 + }, + { + "text": " future", + "timestamps": { + "from": "00:00:17.780", + "to": "00:00:18.220" + }, + "offsets": { + "from": 17780, + "to": 18220 + }, + "id": 2003, + "p": 0.99132, + "t_dtw": -1 + }, + { + "text": " hom", + "timestamps": { + "from": "00:00:18.220", + "to": "00:00:18.440" + }, + "offsets": { + "from": 18220, + "to": 18440 + }, + "id": 3488, + "p": 0.86544, + "t_dtw": -1 + }, + { + "text": "eworks", + "timestamps": { + "from": "00:00:18.440", + "to": "00:00:18.880" + }, + "offsets": { + "from": 18440, + "to": 18880 + }, + "id": 19653, + "p": 0.993508, + "t_dtw": -1 + }, + { + "text": " and", + "timestamps": { + "from": "00:00:18.880", + "to": "00:00:19.100" + }, + "offsets": { + "from": 18880, + "to": 19100 + }, + "id": 290, + "p": 0.988634, + "t_dtw": -1 + }, + { + "text": " solutions", + "timestamps": { + "from": "00:00:19.100", + "to": "00:00:19.760" + }, + "offsets": { + "from": 19100, + "to": 19760 + }, + "id": 8136, + "p": 0.978441, + "t_dtw": -1 + }, + { + "text": " are", + "timestamps": { + "from": "00:00:19.760", + "to": "00:00:19.980" + }, + "offsets": { + "from": 19760, + "to": 19980 + }, + "id": 389, + "p": 0.998055, + "t_dtw": -1 + }, + { + "text": " place", + "timestamps": { + "from": "00:00:19.980", + "to": "00:00:20.340" + }, + "offsets": { + "from": 19980, + "to": 20340 + }, + "id": 1295, + "p": 0.853392, + "t_dtw": -1 + }, + { + "text": "holders", + "timestamps": { + "from": "00:00:20.340", + "to": "00:00:20.850" + }, + "offsets": { + "from": 20340, + "to": 20850 + }, + "id": 10476, + "p": 0.982725, + "t_dtw": -1 + }, + { + "text": ".", + "timestamps": { + "from": "00:00:20.850", + "to": "00:00:21.120" + }, + "offsets": { + "from": 20850, + "to": 21120 + }, + "id": 13, + "p": 0.942825, + "t_dtw": -1 + }, + { + "text": "[_TT_1056]", + "timestamps": { + "from": "00:00:21.120", + "to": "00:00:21.120" + }, + "offsets": { + "from": 21120, + "to": 21120 + }, + "id": 51419, + "p": 0.0691018, + "t_dtw": -1 + } + ] + }, + { + "timestamps": { + "from": "00:00:21.120", + "to": "00:00:24.520" + }, + "offsets": { + "from": 21120, + "to": 24520 + }, + "text": " Tucks of future homeworks are subject to change.", + "tokens": [ + { + "text": " T", + "timestamps": { + "from": "00:00:21.120", + "to": "00:00:21.200" + }, + "offsets": { + "from": 21120, + "to": 21200 + }, + "id": 309, + "p": 0.433385, + "t_dtw": -1 + }, + { + "text": "ucks", + "timestamps": { + "from": "00:00:21.200", + "to": "00:00:21.530" + }, + "offsets": { + "from": 21200, + "to": 21530 + }, + "id": 6238, + "p": 0.424658, + "t_dtw": -1 + }, + { + "text": " of", + "timestamps": { + "from": "00:00:21.530", + "to": "00:00:21.690" + }, + "offsets": { + "from": 21530, + "to": 21690 + }, + "id": 286, + "p": 0.985274, + "t_dtw": -1 + }, + { + "text": " future", + "timestamps": { + "from": "00:00:21.690", + "to": "00:00:22.180" + }, + "offsets": { + "from": 21690, + "to": 22180 + }, + "id": 2003, + "p": 0.997737, + "t_dtw": -1 + }, + { + "text": " hom", + "timestamps": { + "from": "00:00:22.180", + "to": "00:00:22.420" + }, + "offsets": { + "from": 22180, + "to": 22420 + }, + "id": 3488, + "p": 0.975576, + "t_dtw": -1 + }, + { + "text": "eworks", + "timestamps": { + "from": "00:00:22.420", + "to": "00:00:22.910" + }, + "offsets": { + "from": 22420, + "to": 22910 + }, + "id": 19653, + "p": 0.993154, + "t_dtw": -1 + }, + { + "text": " are", + "timestamps": { + "from": "00:00:22.910", + "to": "00:00:23.150" + }, + "offsets": { + "from": 22910, + "to": 23150 + }, + "id": 389, + "p": 0.998262, + "t_dtw": -1 + }, + { + "text": " subject", + "timestamps": { + "from": "00:00:23.150", + "to": "00:00:23.730" + }, + "offsets": { + "from": 23150, + "to": 23730 + }, + "id": 2426, + "p": 0.991766, + "t_dtw": -1 + }, + { + "text": " to", + "timestamps": { + "from": "00:00:23.730", + "to": "00:00:23.890" + }, + "offsets": { + "from": 23730, + "to": 23890 + }, + "id": 284, + "p": 0.908062, + "t_dtw": -1 + }, + { + "text": " change", + "timestamps": { + "from": "00:00:23.890", + "to": "00:00:24.440" + }, + "offsets": { + "from": 23890, + "to": 24440 + }, + "id": 1487, + "p": 0.997778, + "t_dtw": -1 + }, + { + "text": ".", + "timestamps": { + "from": "00:00:24.440", + "to": "00:00:24.520" + }, + "offsets": { + "from": 24440, + "to": 24520 + }, + "id": 13, + "p": 0.981104, + "t_dtw": -1 + }, + { + "text": "[_TT_1226]", + "timestamps": { + "from": "00:00:24.520", + "to": "00:00:24.520" + }, + "offsets": { + "from": 24520, + "to": 24520 + }, + "id": 51589, + "p": 0.0327583, + "t_dtw": -1 + } + ] + } + ] +} \ No newline at end of file diff --git a/PythonRpcServer/transcribe_hellohellohello.wav b/PythonRpcServer/transcribe_hellohellohello.wav new file mode 100644 index 00000000..fd2a335f Binary files /dev/null and b/PythonRpcServer/transcribe_hellohellohello.wav differ diff --git a/PythonRpcServer/youtube.py b/PythonRpcServer/youtube.py index 45841871..dec0fa7f 100644 --- a/PythonRpcServer/youtube.py +++ b/PythonRpcServer/youtube.py @@ -1,12 +1,17 @@ -from pytube.extract import playlist_id +# from pytube.extract import playlist_id + +# from yt_dlp import YoutubeDL +import yt_dlp + import requests -from utils import encode, decode, getRandomString, download_file +from utils import getRandomString import os import json from time import perf_counter +import datetime #from pytube import YouTube -import pytube +# import pytube from mediaprovider import MediaProvider, InvalidPlaylistInfoException @@ -42,7 +47,10 @@ def get_youtube_channel(self, identifier): print(f'get_youtube_channel({identifier})') url = YOUTUBE_CHANNEL_BASE_URL+ identifier - channel = pytube.Channel(url) + # Use yt_dlp to create a channel, + + channel = yt_dlp.Youtube(url).get_channel() + ## channel.playlist_id = channel.playlist_id.replace('UC', 'UU') playlist_id = channel.playlist_id #according to one StackOver and one test, channels-to-playlists can also be converted with string replace UCXXXX to UUXXXX @@ -53,26 +61,33 @@ def get_youtube_playlist(self, identifier): try: start_time = perf_counter() - url= YOUTUBE_PLAYLIST_BASE_URL+ identifier + url= YOUTUBE_PLAYLIST_BASE_URL + identifier print(f"get_youtube_playlist(identifier): {url}") - playlist = pytube.Playlist(url) - + + ydl_opts = { + 'quiet': True, + 'extract_flat': 'in_playlist', # Ensure we are extracting playlist entries + 'force_generic_extractor': True, + } medias = [] - for v in playlist.videos: - - published_at = v.publish_date.strftime('%Y/%m/%d') - media = { - #"channelTitle": channelTitle, - "channelId": v.channel_id, - "playlistId": identifier, - "title": v.title, - "description": v.description, - "publishedAt": published_at, - "videoUrl": v.watch_url, - "videoId": v.video_id, - "createdAt": published_at - } - medias.append(media) + # Current time in iso date time format + now = datetime.datetime.now().isoformat() + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info_dict = ydl.extract_info(url, download=False) + for entry in info_dict.get( 'entries', []): + print(entry) + published_at = entry.get('upload_date', now) + media = { + "channelId": entry['channel_id'], + "playlistId": identifier, + "title": entry['title'], + "description": entry['description'], + "publishedAt": published_at, + "videoUrl": "https://youtube.com/watch?v="+entry['id'], + "videoId": entry['id'], + "createdAt": published_at + } + medias.append(media) end_time = perf_counter() print(f'Youtube playlist {identifier}: Returning {len(medias)} items. Processing time {end_time - start_time :.2f} seconds') return medias @@ -86,7 +101,21 @@ def download_youtube_video(self, youtubeUrl): start_time = perf_counter() extension = '.mp4' filename = getRandomString(8) - filepath = pytube.YouTube(youtubeUrl).streams.filter(subtype='mp4').get_highest_resolution().download(output_path = DATA_DIRECTORY, filename = filename) + filepath =f'{DATA_DIRECTORY}/{filename}' + ydl_opts = { + 'quiet': True, + 'format': 'best[ext=mp4]', + 'outtmpl': filepath, + 'cachedir' : False, + 'progress_hooks': [], + 'call_home': False, + 'no_color': True, + 'noprogress': True, + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + x = ydl.download([youtubeUrl]) + print(x) + #filepath = yt_dlp.YoutubeDL(ydl_opts).streams.filter(subtype='mp4').get_highest_resolution().download(output_path = DATA_DIRECTORY, filename = filename) end_time = perf_counter() print(f"download_youtube_video({youtubeUrl}): Done. Downloaded in {end_time - start_time :.2f} seconds") return filepath, extension diff --git a/PythonRpcServer/youtube_test.py b/PythonRpcServer/youtube_test.py index a7ddf125..a838004d 100644 --- a/PythonRpcServer/youtube_test.py +++ b/PythonRpcServer/youtube_test.py @@ -5,7 +5,7 @@ import youtube -def test_youtube(): +def test_youtube1(): print("Test 1/2: Download playlist") yt=youtube.YoutubeProvider() pl=yt.get_youtube_playlist('PLBgxzZMu3GpPb35BDIU5eeopR4MhBOZw_') @@ -17,7 +17,9 @@ def test_youtube(): assert 'STAT 385' in pl[0]['title'] +def test_youtube2(): print("Test 2/2: Download video") + yt=youtube.YoutubeProvider() onevid = yt.download_youtube_video('https://youtube.com/watch?v=DqHMh8nqCPw') # 24-72 seconds typical print(onevid) assert len(onevid) == 2 @@ -34,4 +36,5 @@ def test_youtube(): print("All tests completed") if __name__ == "__main__": - test_youtube() + test_youtube1() + test_youtube2() diff --git a/TaskEngine.Dockerfile b/TaskEngine.Dockerfile index 6ff8d946..a99244c9 100644 --- a/TaskEngine.Dockerfile +++ b/TaskEngine.Dockerfile @@ -1,6 +1,8 @@ FROM mcr.microsoft.com/dotnet/sdk:8.0-bookworm-slim as build # See https://mcr.microsoft.com/en-us/product/dotnet/sdk/tags #See more comments in API.Dockerfile +# RUN ls +RUN dotnet --list-sdks WORKDIR / RUN git clone https://github.com/eficode/wait-for.git @@ -8,6 +10,8 @@ RUN git clone https://github.com/eficode/wait-for.git WORKDIR /src COPY ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj # --verbosity normal|diagnostic + + RUN dotnet restore --verbosity diagnostic ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj COPY ./TaskEngine/TaskEngine.csproj ./TaskEngine/TaskEngine.csproj diff --git a/TaskEngine/Program.cs b/TaskEngine/Program.cs index a8e1c405..d8210111 100644 --- a/TaskEngine/Program.cs +++ b/TaskEngine/Program.cs @@ -81,7 +81,8 @@ public static void SetupServices() .AddSingleton() .AddSingleton() .AddSingleton() - .AddSingleton() + .AddSingleton() + .AddSingleton() .AddSingleton() // .AddSingleton() .AddSingleton() @@ -132,7 +133,7 @@ static void runQueueAwakerForever() { _logger.LogInformation("Pausing {0} minutes before first periodicCheck", initialPauseInterval); // Thread.Sleep(initialPauseInterval); - Task.Delay(initialPauseInterval).Wait(); + // Task.Delay(initialPauseInterval).Wait(); // Check for new tasks every "timeInterval". // The periodic check will discover all undone tasks // TODO/REVIEW: However some tasks also publish the next items @@ -147,6 +148,23 @@ static void runQueueAwakerForever() { } catch (Exception e) { _logger.LogError(e, "Error in Periodic Check"); } + + // Hacky testing... + // try { + // var videoId = "ddceb720-a9d6-417d-b5ea-e94c6c0a86c6"; + // _logger.LogInformation("Transcription Task Initiated"); + // queueAwakerTask.Publish(new JObject + // { + // { "Type", TaskType.LocalTranscribeVideo.ToString() }, + // { "videoOrMediaId", videoId } + // }); + + // _logger.LogInformation("Transcription Task Published Successfully"); + // } catch (Exception e) { + // _logger.LogError(e, "Error in Transcription Task"); + // } + + // Thread.Sleep(timeInterval); Task.Delay(timeInterval).Wait(); _logger.LogInformation("Pausing {0} minutes before next periodicCheck", periodicCheck); @@ -175,7 +193,7 @@ static void createTaskQueues() { // Transcription Related _logger.LogInformation($"Creating TranscriptionTask consumers. Concurrency={concurrent_transcriptions} "); - _serviceProvider.GetService().Consume(concurrent_transcriptions); + _serviceProvider.GetService().Consume(concurrent_transcriptions); // no more! - _serviceProvider.GetService().Consume(concurrent_transcriptions); @@ -191,7 +209,7 @@ static void createTaskQueues() { _serviceProvider.GetService().Consume(DISABLED_TASK); // We dont want concurrency for these tasks - _logger.LogInformation("Creating QueueAwakerTask and Box token tasks consumers."); + _logger.LogInformation("Creating QueueAwakerTask and Box token tasks consumers!"); _serviceProvider.GetService().Consume(NO_CONCURRENCY); //TODO TOREVIEW: NO_CONCURRENCY? // does nothing at the moment _serviceProvider.GetService().Consume(NO_CONCURRENCY); _serviceProvider.GetService().Consume(NO_CONCURRENCY); // calls _box.CreateAccessTokenAsync(authCode); diff --git a/TaskEngine/Tasks/TranscriptionTask.cs b/TaskEngine/Tasks/AzureTranscriptionTask.cs similarity index 97% rename from TaskEngine/Tasks/TranscriptionTask.cs rename to TaskEngine/Tasks/AzureTranscriptionTask.cs index 217fa633..420cfc94 100644 --- a/TaskEngine/Tasks/TranscriptionTask.cs +++ b/TaskEngine/Tasks/AzureTranscriptionTask.cs @@ -21,7 +21,7 @@ namespace TaskEngine.Tasks /// This task produces the transcriptions for a Video item. /// [SuppressMessage("Microsoft.Performance", "CA1812:MarkMembersAsStatic")] // This class is never directly instantiated - class TranscriptionTask : RabbitMQTask + class AzureTranscriptionTask : RabbitMQTask { private readonly MSTranscriptionService _msTranscriptionService; @@ -29,10 +29,10 @@ class TranscriptionTask : RabbitMQTask private readonly CaptionQueries _captionQueries; - public TranscriptionTask(RabbitMQConnection rabbitMQ, MSTranscriptionService msTranscriptionService, + public AzureTranscriptionTask(RabbitMQConnection rabbitMQ, MSTranscriptionService msTranscriptionService, // GenerateVTTFileTask generateVTTFileTask, - ILogger logger, CaptionQueries captionQueries) - : base(rabbitMQ, TaskType.TranscribeVideo, logger) + ILogger logger, CaptionQueries captionQueries) + : base(rabbitMQ, TaskType.AzureTranscribeVideo, logger) { _msTranscriptionService = msTranscriptionService; // nope _generateVTTFileTask = generateVTTFileTask; diff --git a/TaskEngine/Tasks/ConvertVideoToWavTask.cs b/TaskEngine/Tasks/ConvertVideoToWavTask.cs index a8e2f363..5ec7475c 100644 --- a/TaskEngine/Tasks/ConvertVideoToWavTask.cs +++ b/TaskEngine/Tasks/ConvertVideoToWavTask.cs @@ -21,13 +21,13 @@ namespace TaskEngine.Tasks class ConvertVideoToWavTask : RabbitMQTask { private readonly RpcClient _rpcClient; - private readonly TranscriptionTask _transcriptionTask; + private readonly LocalTranscriptionTask _localTranscriptionTask; - public ConvertVideoToWavTask(RabbitMQConnection rabbitMQ, RpcClient rpcClient, TranscriptionTask transcriptionTask, ILogger logger) + public ConvertVideoToWavTask(RabbitMQConnection rabbitMQ, RpcClient rpcClient, LocalTranscriptionTask localTranscriptionTask, ILogger logger) : base(rabbitMQ, TaskType.ConvertMedia, logger) { _rpcClient = rpcClient; - _transcriptionTask = transcriptionTask; + _localTranscriptionTask = localTranscriptionTask; } protected override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) @@ -72,11 +72,10 @@ private async Task OldOnConsumeNotUsed(string videoId) videoLatest.Audio = fileRecord; await _context.SaveChangesAsync(); - // If no transcriptions present, produce transcriptions. if (!videoLatest.Transcriptions.Any()) { - _transcriptionTask.Publish(videoLatest.Id); + _localTranscriptionTask.Publish(videoLatest.Id); } } } diff --git a/TaskEngine/Tasks/LocalTranscriptionTask.cs b/TaskEngine/Tasks/LocalTranscriptionTask.cs new file mode 100644 index 00000000..b20ee82b --- /dev/null +++ b/TaskEngine/Tasks/LocalTranscriptionTask.cs @@ -0,0 +1,199 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using System.Threading.Tasks; +using Grpc.Core; +using Newtonsoft.Json.Linq; + + +using ClassTranscribeDatabase; +using ClassTranscribeDatabase.Models; +using ClassTranscribeDatabase.Services; + +using static ClassTranscribeDatabase.CommonUtils; + +#pragma warning disable CA2007 +// https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007 +// We are okay awaiting on a task in the same thread + +namespace TaskEngine.Tasks +{ + /// + /// This task produces the transcriptions for a Video item. + /// + [SuppressMessage("Microsoft.Performance", "CA1812:MarkMembersAsStatic")] // This class is never directly instantiated + class LocalTranscriptionTask : RabbitMQTask + { + + private readonly CaptionQueries _captionQueries; + private readonly RpcClient _rpcClient; + + + public LocalTranscriptionTask(RabbitMQConnection rabbitMQ, + RpcClient rpcClient, + // GenerateVTTFileTask generateVTTFileTask, + ILogger logger, CaptionQueries captionQueries) + : base(rabbitMQ, TaskType.LocalTranscribeVideo, logger) + { + _rpcClient = rpcClient; + _captionQueries = captionQueries; + } + + protected async override Task OnConsume(string videoId, TaskParameters taskParameters, ClientActiveTasks cleanup) + { + RegisterTask(cleanup, videoId); // may throw AlreadyInProgress exception + + const string SOURCEINTERNALREF= "ClassTranscribe/Local"; // Do not change me; this is a key inside the database + // to indicate the source of the captions was this code + + + using (var _context = CTDbContext.CreateDbContext()) + { + + // TODO: taskParameters.Force should wipe all captions and reset the Transcription Status + + Video video = await _context.Videos.Include(v => v.Video1).Where(v => v.Id == videoId).FirstAsync(); + // ! Note the 'Include' ; we don't build the whole tree of related Entities + + if (video.TranscriptionStatus == Video.TranscriptionStatusMessages.NOERROR) + { + GetLogger().LogInformation($"{videoId}:Skipping Transcribing of- already complete"); + return; + } + var medias = await _context.Medias.Include(m=>m.Playlist).Where(m=>m.VideoId == videoId && m.Playlist != null).ToListAsync(); + if(medias.Count == 0) { + GetLogger().LogInformation($"{videoId}:Skipping Transcribing - no media / playlist cares about this video"); + return; + } + + GetLogger().LogInformation($"{videoId}: Has new Phrase Hints: {video.HasPhraseHints()}"); + + string phraseHints = ""; + if (video.HasPhraseHints()) { + var data = await _context.TextData.FindAsync(video.PhraseHintsDataId); + phraseHints = data.Text; + } else + { // deprecated + phraseHints = video.PhraseHints ?? ""; + } + + GetLogger().LogInformation($"{videoId}:Using Phrase Hints length = {phraseHints.Length}"); + // GetKey can throw if the video.Id is currently being transcribed + // However registerTask should have already detected that + var key = TaskEngineGlobals.KeyProvider.GetKey(video.Id); + + video.TranscribingAttempts += 10; + await _context.SaveChangesAsync(); + GetLogger().LogInformation($"{videoId}: Updated TranscribingAttempts = {video.TranscribingAttempts}"); + try + { + var mockWhisperResult = Globals.appSettings.MOCK_RECOGNITION == "MOCK"; + + GetLogger().LogInformation($"{videoId}: Calling RecognitionWithVideoStreamAsync( mock={mockWhisperResult})"); + + var request = new CTGrpc.TranscriptionRequest + { + LogId = videoId, + FilePath = video.Video1.VMPath, + Model = "en", + Language = "en", + Testing = mockWhisperResult + // PhraseHints = phraseHints, + // CourseHints = "", + // OutputLanguages = "en" + }; + var jsonString = ""; + try { + jsonString = (await _rpcClient.PythonServerClient.TranscribeAudioRPCAsync(request)).Json; + } + catch (RpcException e) + { + if (e.Status.StatusCode == StatusCode.InvalidArgument) + { + GetLogger().LogError($"TranscribeAudioRPCAsync=({videoId}):{e.Message}"); + } + return; + } finally { + GetLogger().LogInformation($"{videoId} Transcribe - rpc complete"); + TaskEngineGlobals.KeyProvider.ReleaseKey(key, video.Id); + } + + JObject jObject = JObject.Parse(jsonString); + // JArray jArray = JArray.Parse(jsonString); + var theLanguage = jObject["result"]["language"].ToString(Newtonsoft.Json.Formatting.None); + var theCaptionsAsJson = jObject["transcription"]; + + var theCaptions = new List(); + int cueCount = 0; + + foreach (var jsonCue in theCaptionsAsJson) { + // var caption = new Caption() { + // Index = cueCount ++, + // Begin = TimeSpan.Parse(jsonCue["timestamps"]["from"].ToString(Newtonsoft.Json.Formatting.None).Replace(",",".")), + // End = TimeSpan.Parse(jsonCue["timestamps"]["to"].ToString(Newtonsoft.Json.Formatting.None).Replace(",",".")) , + // Text = jsonCue["text"] .ToString(Newtonsoft.Json.Formatting.None).Trim() + // }; + var fromTimestamp = jsonCue["timestamps"]["from"].ToString().Replace(",", "."); + var toTimestamp = jsonCue["timestamps"]["to"].ToString().Replace(",", "."); + + // Parse the timestamps directly + var caption = new Caption() { + Index = cueCount++, + Begin = TimeSpan.Parse(fromTimestamp), // Expecting "HH:mm:ss.fff" + End = TimeSpan.Parse(toTimestamp), // Expecting "HH:mm:ss.fff" + Text = jsonCue["text"].ToString().Trim() + }; + + theCaptions.Add(caption); + } + if (theCaptions.Count > 0) + { + GetLogger().LogInformation($"{videoId}: Created {theCaptions.Count} captions objects"); + + var t = _context.Transcriptions.SingleOrDefault(t => t.VideoId == video.Id && t.SourceInternalRef == SOURCEINTERNALREF && t.Language == theLanguage && t.TranscriptionType == TranscriptionType.Caption); + GetLogger().LogInformation($"Find Existing Transcriptions null={t == null}"); + // Did we get the default or an existing Transcription entity? + if (t == null) + { + t = new Transcription() + { + TranscriptionType = TranscriptionType.Caption, + Captions = theCaptions, + Language = "en-US" , /* Must be en-US for FrontEnd; Cant be just "en" */ + VideoId = video.Id, + Label = $"{theLanguage} (ClassTranscribe)", + SourceInternalRef = SOURCEINTERNALREF, // + SourceLabel = "ClassTranscribe (Local" + (phraseHints.Length>0 ?" with phrase hints)" : ")") + // Todo store the entire Whisper result here + }; + _context.Add(t); + } + else + { + t.Captions.AddRange(theCaptions); + } + } + + + video.TranscriptionStatus = Video.TranscriptionStatusMessages.NOERROR; + // video.JsonMetadata["LastSuccessfulTime"] = result.LastSuccessTime.ToString(); + + GetLogger().LogInformation($"{videoId}: Saving captions"); + await _context.SaveChangesAsync(); + } + catch (Exception ex) + { + GetLogger().LogError(ex, $"{videoId}: Transcription Exception:${ex.StackTrace}"); + video.TranscribingAttempts += 1000; + await _context.SaveChangesAsync(); + throw; + } + + } + } + + } +} \ No newline at end of file diff --git a/TaskEngine/Tasks/QueueAwakerTask.cs b/TaskEngine/Tasks/QueueAwakerTask.cs index ed1d7225..78c21704 100644 --- a/TaskEngine/Tasks/QueueAwakerTask.cs +++ b/TaskEngine/Tasks/QueueAwakerTask.cs @@ -22,7 +22,7 @@ class QueueAwakerTask : RabbitMQTask private readonly DownloadPlaylistInfoTask _downloadPlaylistInfoTask; private readonly DownloadMediaTask _downloadMediaTask; // private readonly ConvertVideoToWavTask _convertVideoToWavTask; - private readonly TranscriptionTask _transcriptionTask; + private readonly LocalTranscriptionTask _transcriptionTask; // nope private readonly GenerateVTTFileTask _generateVTTFileTask; private readonly ProcessVideoTask _processVideoTask; private readonly SceneDetectionTask _sceneDetectionTask; @@ -39,7 +39,7 @@ public QueueAwakerTask() { } public QueueAwakerTask(RabbitMQConnection rabbitMQ, DownloadPlaylistInfoTask downloadPlaylistInfoTask, DownloadMediaTask downloadMediaTask, - TranscriptionTask transcriptionTask, ProcessVideoTask processVideoTask, + LocalTranscriptionTask transcriptionTask, ProcessVideoTask processVideoTask, // GenerateVTTFileTask generateVTTFileTask, SceneDetectionTask sceneDetectionTask, CreateBoxTokenTask createBoxTokenTask,// UpdateBoxTokenTask updateBoxTokenTask, @@ -401,10 +401,10 @@ protected async override Task OnConsume(JObject jObject, TaskParameters taskPara var sourceId = jObject["SourceId"].ToString(); _pythonCrawlerTask.Publish(sourceId); } - else if (type == TaskType.TranscribeVideo.ToString()) + else if (type == TaskType.LocalTranscribeVideo.ToString()) { var id = jObject["videoOrMediaId"].ToString(); - + GetLogger().LogInformation($"{type}:{id}"); var video = await _context.Videos.FindAsync(id); diff --git a/TaskEngine/Tasks/SceneDetectionTask.cs b/TaskEngine/Tasks/SceneDetectionTask.cs index 1baf80c0..711bc92c 100644 --- a/TaskEngine/Tasks/SceneDetectionTask.cs +++ b/TaskEngine/Tasks/SceneDetectionTask.cs @@ -19,13 +19,13 @@ namespace TaskEngine.Tasks class SceneDetectionTask : RabbitMQTask { private readonly RpcClient _rpcClient; - private readonly TranscriptionTask _transcriptionTask; + private readonly LocalTranscriptionTask _transcriptionTask; - public SceneDetectionTask(RabbitMQConnection rabbitMQ,TranscriptionTask transcriptionTask, RpcClient rpcClient, ILogger logger) + public SceneDetectionTask(RabbitMQConnection rabbitMQ,LocalTranscriptionTask localTanscriptionTask, RpcClient rpcClient, ILogger logger) : base(rabbitMQ, TaskType.SceneDetection, logger) { _rpcClient = rpcClient; - _transcriptionTask = transcriptionTask; + _transcriptionTask = localTanscriptionTask; } /// Extracts scene information for a video. /// Beware: It is possible to start another scene task while the first one is still running diff --git a/TaskEngine/TempCode.cs b/TaskEngine/TempCode.cs index 896d2f72..34af0142 100644 --- a/TaskEngine/TempCode.cs +++ b/TaskEngine/TempCode.cs @@ -24,7 +24,7 @@ class TempCode private readonly PythonCrawlerTask _pythonCrawlerTask; private readonly ProcessVideoTask _processVideoTask; // private readonly GenerateVTTFileTask _generateVTTFileTask; - private readonly TranscriptionTask _transcriptionTask; + private readonly LocalTranscriptionTask _transcriptionTask; private readonly ConvertVideoToWavTask _convertVideoToWavTask; private readonly DownloadMediaTask _downloadMediaTask; private readonly DownloadPlaylistInfoTask _downloadPlaylistInfoTask; @@ -34,7 +34,7 @@ class TempCode public TempCode(CTDbContext c, CreateBoxTokenTask createBoxTokenTask, //UpdateBoxTokenTask updateBoxTokenTask, SceneDetectionTask ePubGeneratorTask, ProcessVideoTask processVideoTask, - TranscriptionTask transcriptionTask, ConvertVideoToWavTask convertVideoToWavTask, DownloadMediaTask downloadMediaTask, + LocalTranscriptionTask localTranscriptionTask, ConvertVideoToWavTask convertVideoToWavTask, DownloadMediaTask downloadMediaTask, DownloadPlaylistInfoTask downloadPlaylistInfoTask, QueueAwakerTask queueAwakerTask, CleanUpElasticIndexTask cleanUpElasticIndexTask, RpcClient rpcClient, PythonCrawlerTask pythonCrawlerTask) @@ -45,7 +45,7 @@ public TempCode(CTDbContext c, CreateBoxTokenTask createBoxTokenTask, //UpdateBo _sceneDetectionTask = ePubGeneratorTask; _processVideoTask = processVideoTask; // _generateVTTFileTask = generateVTTFileTask; - _transcriptionTask = transcriptionTask; + _transcriptionTask = localTranscriptionTask; _convertVideoToWavTask = convertVideoToWavTask; _downloadMediaTask = downloadMediaTask; _downloadPlaylistInfoTask = downloadPlaylistInfoTask; diff --git a/TaskEngine/global.json b/TaskEngine/global.json index a679dd12..215288b9 100644 --- a/TaskEngine/global.json +++ b/TaskEngine/global.json @@ -1,5 +1,5 @@ { "sdk": { - "version": "8.0.401" + "version": "8.0" } } \ No newline at end of file diff --git a/ct.proto b/ct.proto index 512975ec..17958231 100644 --- a/ct.proto +++ b/ct.proto @@ -20,6 +20,16 @@ service PythonServer { rpc ComputeFileHash (FileHashRequest) returns (FileHashResponse) {} rpc GetMediaInfoRPC(File) returns (JsonString) {} + + rpc TranscribeAudioRPC (TranscriptionRequest) returns (JsonString) {} +} + +message TranscriptionRequest { + string filePath = 1; // Path to the audio/video file to be transcribed + string model = 2; // Whisper model to use (e.g., 'base-en', 'tiny-en') + string language = 3; // Language in audio. + string logId = 4; + bool testing = 5; } @@ -31,7 +41,6 @@ message JsonString { // The response message containing the greetings. message PlaylistRequest { string Url = 1; - int32 stream = 2; JsonString metadata = 3; } diff --git a/pythonrpcserver.Dockerfile b/pythonrpcserver.Dockerfile index 443ccc6d..c3e6498b 100644 --- a/pythonrpcserver.Dockerfile +++ b/pythonrpcserver.Dockerfile @@ -8,19 +8,26 @@ WORKDIR /whisper.cpp RUN git clone https://github.com/ggerganov/whisper.cpp . && make RUN bash ./models/download-ggml-model.sh base.en + RUN bash ./models/download-ggml-model.sh tiny.en + RUN bash ./models/download-ggml-model.sh large-v3 - # ------------------------------ - # Stage 2: Setup Python RPC Server - # ------------------------------ +# ------------------------------ +# Stage 2: Setup Python RPC Server +# ------------------------------ FROM --platform=linux/amd64 python:3.8.15-slim-buster AS rpcserver RUN apt-get update && \ apt-get install -y curl gcc g++ make libglib2.0-0 libsm6 libxext6 libxrender-dev ffmpeg ENV OMP_THREAD_LIMIT=1 COPY --from=whisperbuild /whisper.cpp/main /usr/local/bin/whisper - COPY --from=whisperbuild /whisper.cpp/models/ggml-base.en.bin /usr/local/bin/models/ggml-base.en.bin + COPY --from=whisperbuild /whisper.cpp/models /PythonRpcServer/models WORKDIR /PythonRpcServer + # Don't copy any py files here, so that we don't need to re-run whisper + COPY ./PythonRpcServer/transcribe_hellohellohello.wav . + # The output of tis whisper run is used when we set MOCK_RECOGNITION=MOCK for quick testing + RUN whisper -ojf -f transcribe_hellohellohello.wav + COPY ./PythonRpcServer/requirements.txt requirements.txt RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt @@ -30,6 +37,7 @@ COPY ./PythonRpcServer . + CMD [ "nice", "-n", "18", "ionice", "-c", "2", "-n", "6", "python3", "-u", "/PythonRpcServer/server.py" ] diff --git a/randomvoice_16kHz.json b/randomvoice_16kHz.json new file mode 100644 index 00000000..c3053a9b --- /dev/null +++ b/randomvoice_16kHz.json @@ -0,0 +1 @@ +{"text": " Hello? Hello? Hello?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 3.0, "text": " Hello? Hello? Hello?", "tokens": [50363, 18435, 30, 18435, 30, 18435, 30, 50513], "temperature": 0.0, "avg_logprob": -0.636968559688992, "compression_ratio": 1.1764705882352942, "no_speech_prob": 0.22877301275730133}], "language": "en"} \ No newline at end of file