Skip to content

Commit

Permalink
Merge pull request #456 from classtranscribe/staging
Browse files Browse the repository at this point in the history
Push to production
  • Loading branch information
angrave authored Jan 26, 2024
2 parents 3120c5c + fa176f2 commit d3524e7
Show file tree
Hide file tree
Showing 8 changed files with 202 additions and 143 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ private async Task<MSTResult> performRecognitionAsync(string logId, string fileP

if (verboseLogging)
{
_logger.LogInformation($"{logId}: Begin={begin.Minutes}:{begin.Seconds},{begin.Milliseconds}", begin);
_logger.LogInformation($"{logId}: End={end.Minutes}:{end.Seconds},{end.Milliseconds}");
_logger.LogInformation($"{logId}: Begin={begin.Minutes}:{begin.Seconds}.{begin.Milliseconds}", begin);
_logger.LogInformation($"{logId}: End={end.Minutes}:{end.Seconds}.{end.Milliseconds}");
}
// TODO/TOREVIEW:
// ToCaptionEntitiesWithWordTiming vs ToCaptionEntitiesInterpolate
Expand Down Expand Up @@ -243,7 +243,8 @@ private async Task<MSTResult> performRecognitionAsync(string logId, string fileP
}
else if (e.Result.Reason == ResultReason.NoMatch)
{
_logger.LogInformation($"{logId}: NOMATCH: Speech could not be recognized.");
TimeSpan begin = (new TimeSpan(e.Result.OffsetInTicks)).Add(restartOffset);
_logger.LogInformation($"{logId}: NOMATCH: ({begin.Minutes}:{begin.Seconds}) Speech could not be recognized.");
}
};

Expand Down
80 changes: 42 additions & 38 deletions ClassTranscribeServer/Controllers/AdminController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,51 +70,55 @@ public ActionResult UpdateAllPlaylists()
/// <summary>
/// Regenerate one Caption (vtt, srt) file of the given Transcription
/// </summary>
[HttpPost("UpdateVTTFile")]
[Authorize(Roles = Globals.ROLE_ADMIN)]
public ActionResult UpdateVTTFile(string transcriptionId)
{
_logger.LogInformation($"Enqueueing {transcriptionId} caption regeneration");
_wakeDownloader.UpdateVTTFile(transcriptionId);
return Ok();
}
/// will be deleted soon - We now generate vtt files dynamically.
// [HttpPost("UpdateVTTFile")]
// [Authorize(Roles = Globals.ROLE_ADMIN)]
// public ActionResult UpdateVTTFile(string transcriptionId)
// {
// _logger.LogInformation($"Enqueueing {transcriptionId} caption regeneration");
// _wakeDownloader.UpdateVTTFile(transcriptionId);
// return Ok();
// }

/// <summary>
/// Regenerate all Caption (vtt, srt) files of the given course offering
/// </summary>
[HttpPost("UpdateVTTFilesInCourseOffering")]
[Authorize(Roles = Globals.ROLE_ADMIN)]
public async Task<ActionResult> UpdateVTTFilesInCourseOffering(string offeringId = null)
{

var playlistIds = await _context.Playlists.Where(p => p.OfferingId == offeringId).Select(p => p.Id).ToListAsync();
_logger.LogInformation($"UpdateVTTFilesinPlaylist(${offeringId}): Found {playlistIds.Count} playlists");

var videoIds = await _context.Medias.Where(m => playlistIds.Contains(m.PlaylistId)).Select(m => m.VideoId).ToListAsync();
_logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {videoIds.Count} videos");
var transcriptionIds = await _context.Transcriptions.Where(t => videoIds.Contains(t.VideoId)).Select(t => t.Id).ToListAsync();
_logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {transcriptionIds.Count} vtt transcriptions to regenerate");
foreach (var t in transcriptionIds)
{
_wakeDownloader.UpdateVTTFile(t);
}
return Ok($"Requested {transcriptionIds.Count} Transcriptions to be regenerated from {videoIds.Count} videos in {playlistIds.Count} playlists");
}
/// Will be deleted soon - we no longer store vtt files
// [HttpPost("UpdateVTTFilesInCourseOffering")]
// [Authorize(Roles = Globals.ROLE_ADMIN)]
// public async Task<ActionResult> UpdateVTTFilesInCourseOffering(string offeringId = null)
// {

// var playlistIds = await _context.Playlists.Where(p => p.OfferingId == offeringId).Select(p => p.Id).ToListAsync();
// _logger.LogInformation($"UpdateVTTFilesinPlaylist(${offeringId}): Found {playlistIds.Count} playlists");

// var videoIds = await _context.Medias.Where(m => playlistIds.Contains(m.PlaylistId)).Select(m => m.VideoId).ToListAsync();
// _logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {videoIds.Count} videos");
// var transcriptionIds = await _context.Transcriptions.Where(t => videoIds.Contains(t.VideoId)).Select(t => t.Id).ToListAsync();
// _logger.LogInformation($"UpdateVTTFilesinPlaylist(): Found {transcriptionIds.Count} vtt transcriptions to regenerate");
// foreach (var t in transcriptionIds)
// {
// _wakeDownloader.UpdateVTTFile(t);
// }
// return Ok($"Requested {transcriptionIds.Count} Transcriptions to be regenerated from {videoIds.Count} videos in {playlistIds.Count} playlists");
// }

/// <summary>
/// Regenerate all Caption (vtt, srt) files of all transcriptions
/// </summary>
[HttpPost("UpdateAllVTTFiles")]
[Authorize(Roles = Globals.ROLE_ADMIN)]
public async Task<ActionResult> UpdateAllVTTFiles()
{
var transcriptionIds = await _context.Transcriptions.Select(t => t.Id).ToListAsync();
_logger.LogInformation($"UpdateAllVTTFiles: Enqueueing {transcriptionIds.Count} vtt transcriptions to regenerate");
foreach (var t in transcriptionIds)
{
_wakeDownloader.UpdateVTTFile(t);
}
return Ok();
}
/// will be deleted soon - we no longer store vtt files
// [HttpPost("UpdateAllVTTFiles")]
// [Authorize(Roles = Globals.ROLE_ADMIN)]
// public async Task<ActionResult> UpdateAllVTTFiles()
// {
// var transcriptionIds = await _context.Transcriptions.Select(t => t.Id).ToListAsync();
// _logger.LogInformation($"UpdateAllVTTFiles: Enqueueing {transcriptionIds.Count} vtt transcriptions to regenerate");
// foreach (var t in transcriptionIds)
// {
// _wakeDownloader.UpdateVTTFile(t);
// }
// return Ok();
// }


/// <summary>
Expand Down
20 changes: 12 additions & 8 deletions TaskEngine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ public static void Main()
builder.AddConsole();
builder.AddFilter<Microsoft.Extensions.Logging.ApplicationInsights.ApplicationInsightsLoggerProvider>
("", LogLevel.Warning);
string insightKey = configuration.GetValue<string>("APPLICATION_INSIGHTS_KEY");
if (!String.IsNullOrEmpty(insightKey) && insightKey.Trim().Length>1)
{
builder.AddApplicationInsights(insightKey);
}
// If we use A.I. in the future -
// Use the AddApplicationInsights() overload which accepts Action<TelemetryConfiguration> and set TelemetryConfiguration.ConnectionString. See https://github.com/microsoft/ApplicationInsights-dotnet/issues/2560 for more details.

// string insightKey = configuration.GetValue<string>("APPLICATION_INSIGHTS_KEY");
// if (!String.IsNullOrEmpty(insightKey) && insightKey.Trim().Length>1)
// {
// builder.AddApplicationInsights(insightKey);
// }
})
.AddOptions()
.Configure<AppSettings>(configuration)
Expand All @@ -58,7 +61,7 @@ public static void Main()
.AddSingleton<ConvertVideoToWavTask>()
.AddSingleton<TranscriptionTask>()
.AddSingleton<QueueAwakerTask>()
.AddSingleton<GenerateVTTFileTask>()
// .AddSingleton<GenerateVTTFileTask>()
.AddSingleton<RpcClient>()
.AddSingleton<ProcessVideoTask>()
.AddSingleton<MSTranscriptionService>()
Expand Down Expand Up @@ -116,10 +119,11 @@ public static void Main()
serviceProvider.GetService<DownloadMediaTask>().Consume(concurrent_synctasks);

// Transcription Related
_logger.LogInformation($"Creating TranscriptionTask & GenerateVTTFileTask consumers. Concurrency={concurrent_transcriptions} ");
_logger.LogInformation($"Creating TranscriptionTask consumers. Concurrency={concurrent_transcriptions} ");

serviceProvider.GetService<TranscriptionTask>().Consume(concurrent_transcriptions);
serviceProvider.GetService<GenerateVTTFileTask>().Consume(concurrent_transcriptions);

// no more! - serviceProvider.GetService<GenerateVTTFileTask>().Consume(concurrent_transcriptions);

// Video Processing Related
_logger.LogInformation($"Creating ProcessVideoTask consumer. Concurrency={concurrent_videotasks} ");
Expand Down
83 changes: 65 additions & 18 deletions TaskEngine/Tasks/DownloadMediaTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,17 +134,45 @@ protected async Task<bool> updateMediaWithVideo(string mediaId, Video newVideo)
await newVideo.DeleteVideoAsync(_context);
return false;
}
GetLogger().LogInformation($"Media ({mediaId}): media.Video == null is {media.Video == null}");
GetLogger().LogInformation($"Media ({mediaId}): existing media.Video {media.Video != null}");
GetLogger().LogInformation($"Media ({mediaId}): media.Video?.Video1.Id={media.Video?.Video1.Id} ...Video2.Id={media.Video?.Video2.Id} ");

GetLogger().LogInformation($"Media ({mediaId}): downloaded: newVideo.Video1={newVideo.Video1} ...Video2={newVideo.Video2} ");
GetLogger().LogInformation($"Media ({mediaId}): downloaded: newVideo.Video1.Hash={newVideo.Video1?.Hash} ...Hash2={newVideo.Video2?.Hash} ");

// Don't add video if there are already videos for the given media.
//
if(newVideo.Id != null) {
GetLogger().LogError($"Media ({mediaId}): Huh? newVideo should not have an Id yet - that's my job!");
return false;
}
if (media.Video != null)

if (media.VideoId != null)
{
GetLogger().LogInformation($"Media ({mediaId}): Surprise - media already has video set (race condition?)- no further processing required.Discarding new files");
// Normally a DownloadMediaTask is only triggered if the video is null.
// So this code is run when a manual DownloadMediaTask is requested again
var changed = false;
var v = media.Video;
GetLogger().LogInformation($"Media ({mediaId}): Media already has video with video1Id <{media.VideoId}> Cherrypicking new files");
var pickVideo2 = newVideo.Video2 != null && (v.Video2Id == null || newVideo.Video2.Hash != v.Video2.Hash);
GetLogger().LogInformation($"Media ({mediaId}):pickVideo2={pickVideo2}");

if( newVideo.Video2 != null && (v.Video2Id == null || newVideo.Video2.Hash != v.Video2.Hash)){
_context.FileRecords.Add(newVideo.Video2);
_context.SaveChanges(); // now v2 has an Id, so we can use below
v.Video2 = newVideo.Video2;
newVideo.Video2 = null;
changed = true;
}
if(newVideo.ASLVideo != null && ( v.ASLVideoId == null || newVideo.ASLVideo.Hash != v.ASLVideo.Hash)) {
_context.FileRecords.Add(newVideo.ASLVideo);
_context.SaveChanges(); // now v2 has an Id, so we can use below
v.ASLVideo = newVideo.ASLVideo;
newVideo.ASLVideo = null;
changed = true;
}
if(changed) _context.SaveChanges();
await newVideo.DeleteVideoAsync(_context);
return false;
return changed;
}
// Time to find out what we have in the database
// Important idea: the newVideo and its filerecords are not yet part of the database.
Expand All @@ -155,7 +183,7 @@ protected async Task<bool> updateMediaWithVideo(string mediaId, Video newVideo)
var existingPrimaryVideo = existingPrimaryVideos?.FirstOrDefault(); // If non null we expect 0 or 1

GetLogger().LogInformation($"Media ({mediaId}): {matchingFiles.Count} FileRecord hash match found");
GetLogger().LogInformation($"Media ({mediaId}): {existingPrimaryVideos?.Count ?? 0} existing Videos found");
GetLogger().LogInformation($"Media ({mediaId}): {existingPrimaryVideos?.Count ?? 0} existing Primary Videos found");

// cherrypick case (see comment below)
if (existingPrimaryVideo != null)
Expand Down Expand Up @@ -240,48 +268,67 @@ public async Task<Video> DownloadKalturaVideo(string subdir, Media media)
string temp = video1Url;
video1Url = video2Url;
video2Url = temp;
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): swapped streams to 1:<{video1Url}> and 2:<{video2Url}>");
}
}
catch (Exception) { };

GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): Requesting download of video1 ({video1Url})");
var mediaResponse = await _rpcClient.PythonServerClient.DownloadKalturaVideoRPCAsync(new CTGrpc.MediaRequest
{
VideoUrl = video1Url
});
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): Video1 downloaded to ({mediaResponse.FilePath})");

Video video;
if (FileRecord.IsValidFile(mediaResponse.FilePath))
// Sanity Check if the downloaded file is valid has at least a few bytes.
var isValid = FileRecord.IsValidFile(mediaResponse.FilePath);
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): video1 is valid: {isValid}");
if (isValid)
{
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): GetNewFileRecordAsync");
var video1record = await FileRecord.GetNewFileRecordAsync(mediaResponse.FilePath, mediaResponse.Ext, subdir);
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): {video1record}");
video = new Video
{
Video1 = await FileRecord.GetNewFileRecordAsync(mediaResponse.FilePath, mediaResponse.Ext, subdir)
Video1 = video1record
};
try
{
if (media.JsonMetadata["child"] != null && media.JsonMetadata["child"]["downloadUrl"] != null)
if (video2Url != null)
{
GetLogger().LogInformation($"Media ({media.Id}): Downloading child video");
GetLogger().LogInformation($"Media ({media.Id}): Downloading second video ({video2Url})");

var childMediaR = await _rpcClient.PythonServerClient.DownloadKalturaVideoRPCAsync(new CTGrpc.MediaRequest
var secondMediaR = await _rpcClient.PythonServerClient.DownloadKalturaVideoRPCAsync(new CTGrpc.MediaRequest
{
VideoUrl = video2Url
VideoUrl = video2Url //might be swapped
});
if (FileRecord.IsValidFile(childMediaR.FilePath))
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): Video2 downloaded to ({secondMediaR.FilePath})");

// Sanity Check if the downloaded file is valid has at least a few bytes.

var video2Valid = FileRecord.IsValidFile(secondMediaR.FilePath);
GetLogger().LogInformation($"Media ({media.Id}): Second video downloaded ({secondMediaR.FilePath}) is valid: {video2Valid}");
if (video2Valid)
{
video.Video2 = await FileRecord.GetNewFileRecordAsync(childMediaR.FilePath, childMediaR.Ext, subdir);
}
var video2record = await FileRecord.GetNewFileRecordAsync(secondMediaR.FilePath, secondMediaR.Ext, subdir);
GetLogger().LogInformation($"Media ({media.Id}): Second video record {video2record} ");
video.Video2 = video2record;
}
} else {
GetLogger().LogInformation($"Media ({media.Id}): No second video to download");
}
}
catch (Exception ignored)
{
GetLogger().LogInformation(ignored, $"Couldnt download second video for {media.Id}");
GetLogger().LogError(ignored, $"Media ({media.Id}): Exception {ignored}");
}
}
else
{
GetLogger().LogInformation($"DownloadKalturaVideo ({media.Id}): first downloaded file ({mediaResponse.FilePath}) was not valid>");
throw new Exception("DownloadKalturaVideo Failed + " + media.Id);
}

GetLogger().LogInformation($"Media ({media.Id}): DownloadKalturaVideo done Video1={video.Video1?.Id} Video2={video.Video2?.Id}");
return video;
}

Expand Down
Loading

0 comments on commit d3524e7

Please sign in to comment.