Skip to content

Commit

Permalink
Merge pull request #449 from classtranscribe/staging
Browse files Browse the repository at this point in the history
Push to production
  • Loading branch information
angrave authored Jan 20, 2024
2 parents c724547 + 6f41677 commit 734d000
Show file tree
Hide file tree
Showing 17 changed files with 167 additions and 79 deletions.
26 changes: 23 additions & 3 deletions API.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,34 @@
FROM mcr.microsoft.com/dotnet/sdk:8.0.100-1-bookworm-slim as build
# Also remove platform from docker-compose.override.yml for api and taskengine
# Uncomment build context in docker-compose.override.yml for api and taskengine

# e.g.,
# taskengine:
# image: classtranscribe/taskengine:staging
# #xx platform: linux/amd64 # Nope - Causes SDK "dotnet restore" to hang on M1 Mac
# build:
# context: ../../WebAPI
# target: publish
# dockerfile: ./TaskEngine.Dockerfile
#


#FROM mcr.microsoft.com/dotnet/sdk:8.0-bookworm-slim-amd64 as build
FROM mcr.microsoft.com/dotnet/sdk:8.0-bookworm-slim as build
# See https://mcr.microsoft.com/en-us/product/dotnet/sdk/tags

# FROM mcr.microsoft.com/dotnet/core/sdk:3.1.201-bionic as build
# Running the AMD64 version is of the SDK is broken
# https://github.com/dotnet/dotnet-docker/discussions/4285
# https://github.com/NuGet/Home/issues/13062

RUN apt-get -q update && apt-get -qy install git
WORKDIR /
RUN git clone https://github.com/eficode/wait-for.git

WORKDIR /src
COPY ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj
RUN dotnet restore ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj
# Did not help ENV DOTNET_NUGET_SIGNATURE_VERIFICATION=false
# Add --verbosity normal|diagnostic
RUN dotnet restore --verbosity diagnostic ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj

COPY ./ClassTranscribeServer/ClassTranscribeServer.csproj ./ClassTranscribeServer/ClassTranscribeServer.csproj
RUN dotnet restore ./ClassTranscribeServer/ClassTranscribeServer.csproj
Expand Down
2 changes: 1 addition & 1 deletion ClassTranscribeDatabase/ClassTranscribeDatabase.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
<PackageReference Include="Microsoft.AspNet.WebApi.Client" Version="6.0.0" />
<PackageReference Include="Microsoft.AspNetCore.Http.Abstractions" Version="2.2.0" />
<PackageReference Include="Microsoft.AspNetCore.Identity.EntityFrameworkCore" Version="8.0.0" />
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.34.0" />
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.34.1" />
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="8.0.0" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="8.0.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
Expand Down
4 changes: 3 additions & 1 deletion ClassTranscribeServer/Controllers/MediaController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ public MediaController(IAuthorizationService authorizationService,
[HttpGet("{id}")]
public async Task<ActionResult<MediaDTO>> GetMedia(string id)
{
var media = await _context.Medias.FindAsync(id);
var media = await _context.Medias.
Include(m => m.Video).ThenInclude(v => v.Transcriptions).
Where(m => m.Id == id).FirstOrDefaultAsync();

if (media == null)
{
Expand Down
9 changes: 9 additions & 0 deletions ClassTranscribeServer/Controllers/PlaylistsController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ public async Task<ActionResult<IEnumerable<PlaylistDTO>>> GetPlaylists2(string o
}

var playLists = await _context.Playlists
.Include(p=>p.Medias).ThenInclude(m=>m.Video).ThenInclude(v=>v.Transcriptions)
.Include(p=>p.Medias).ThenInclude(m=>m.Video).ThenInclude(v=>v.ProcessedVideo1)
.Include(p=>p.Medias).ThenInclude(m=>m.Video).ThenInclude(v=>v.ProcessedVideo2)
.Include(p=>p.Medias).ThenInclude(m=>m.Video).ThenInclude(v=>v.Video1)
.Include(p=>p.Medias).ThenInclude(m=>m.Video).ThenInclude(v=>v.Video2)
.Include(p=>p.Medias).ThenInclude(m=>m.Video).ThenInclude(v=>v.ASLVideo)

.Where(p => p.OfferingId == offeringId)
.OrderBy(p => p.Index)
.ThenBy(p => p.CreatedAt).ToListAsync();
Expand Down Expand Up @@ -216,6 +223,8 @@ public async Task<ActionResult<IEnumerable<MediaSearchDTO>>> SearchForMedia(stri
public async Task<ActionResult<PlaylistDTO>> GetPlaylist(string id)
{
var p = await _context.Playlists.FindAsync(id);
// Media are explicitly loaded below, so LoadAsync is not needed

var user = await _userUtils.GetUser(User);
if (p == null)
{
Expand Down
4 changes: 3 additions & 1 deletion ClassTranscribeServer/Utils/Authorization.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ protected override async Task HandleRequirementAsync(AuthorizationHandlerContext
}
else if (offering != null && offering.AccessType == AccessTypes.UniversityOnly && user != null)
{
var universityId = await _ctDbContext.CourseOfferings.Where(co => co.OfferingId == offering.Id)
var universityId = await _ctDbContext.CourseOfferings
.Include(co=>co.Course).ThenInclude(c=>c.Department)
.Where(co => co.OfferingId == offering.Id)
.Select(c => c.Course.Department.UniversityId).FirstAsync();
if (user.UniversityId == universityId)
{
Expand Down
81 changes: 45 additions & 36 deletions PythonRpcServer/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,49 +1,58 @@
backcall==0.1.0
certifi==2023.7.22
chardet==3.0.4
click==7.1.1
decorator==4.4.2
ffmpy==0.2.2
grpcio==1.28.1
grpcio-tools==1.28.1
idna==2.9
ipython==7.16.3
ipython-genutils==0.2.0
jedi==0.17.0
KalturaApiClient==18.11.0
lxml==4.9.1

#numpy==1.21.6
numpy
#Dependabot suggested 1.22 ; but that's not generally available yet
# 2024-1-17 Removed Pygments,python-genutils and ipython
# Also removed jedi. why autocomplete?? jedi==0.19.1
#Why? prompt-toolkit==3.0.43

#Did not try updating (maybe nexttime)
#protobuf==3.15.0 #4.25.2
protobuf==4.25.2

#opencv-python==4.2.0.34
# replaced by opencv-contrib-python
parso==0.7.0
pexpect==4.8.0
#Floowing Updated to latest 2024-1-17:
certifi==2023.11.17
backcall==0.2.0
chardet==5.2.0
click==8.1.7
decorator==5.1.1
ffmpy==0.3.1
grpcio==1.60.0
grpcio-tools==1.60.0
idna==3.6
KalturaApiClient==19.3.0
lxml==5.1.0
parso==0.8.3
pexpect==4.9.0
pickleshare==0.7.5
prompt-toolkit==3.0.5
protobuf==3.15.0
ptyprocess==0.7.0
requests==2.31.0
requests-toolbelt==1.0.0
six==1.16.0
tqdm==4.66.1
traitlets==4.3.3
urllib3==2.1.0
wcwidth==0.2.13

# Not versioned
numpy
pytube # if not available, use the tar.gz package (see Dockerfile)


# protobuf version 3.18.3 causes NotImplementedError("To be implemented") in PythonRpcServer/mediaprovider.py
# Likely need to coordinate updating the C# version too

ptyprocess==0.6.0
Pygments==2.7.4
requests==2.23.0
requests-toolbelt==0.9.1

#############
# Gone
#ipython==7.16.3
#ipython-genutils==0.2.0
# Pygments==2.7.4

# No longer needed for remaning pythonrpcserver tasks
#scenedetect==0.5.2
six==1.14.0
tqdm==4.45.0
traitlets==4.3.3
urllib3==1.25.8
wcwidth==0.1.9
#scikit-image==0.17.2
#nltk==3.6.6
#pytesseract==0.3.7
#prefixspan==0.5.2
#opencv-contrib-python==4.5.3.56
#mtcnn-opencv==1.0.2
#decord==0.6.0

# Use latest version
pytube # if not available, use the tar.gz package (see Dockerfile)
#opencv-python==4.2.0.34; replaced by opencv-contrib-python
#
24 changes: 12 additions & 12 deletions TaskEngine.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
FROM mcr.microsoft.com/dotnet/sdk:8.0.100-1-bookworm-slim as build
FROM mcr.microsoft.com/dotnet/sdk:8.0-bookworm-slim as build
# See https://mcr.microsoft.com/en-us/product/dotnet/sdk/tags
# 7.0.404-1 as build
# FROM mcr.microsoft.com/dotnet/core/sdk:3.1.201-bionic as build
#See more comments in API.Dockerfile

WORKDIR /
RUN git clone https://github.com/eficode/wait-for.git

WORKDIR /src
COPY ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj
RUN dotnet restore ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj
# --verbosity normal|diagnostic
RUN dotnet restore --verbosity diagnostic ./ClassTranscribeDatabase/ClassTranscribeDatabase.csproj

COPY ./TaskEngine/TaskEngine.csproj ./TaskEngine/TaskEngine.csproj
RUN dotnet restore ./TaskEngine/TaskEngine.csproj
Expand All @@ -20,8 +20,11 @@ COPY ./TaskEngine ./TaskEngine
WORKDIR /src/TaskEngine
RUN dotnet publish TaskEngine.csproj -c Release -o /app --no-restore

FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim as publish_base
# FROM mcr.microsoft.com/dotnet/core/runtime:3.1.3-bionic as publish_base
#FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim as publish_base
FROM mcr.microsoft.com/dotnet/aspnet:8.0 as publish_base
# https://hub.docker.com/_/microsoft-dotnet-aspnet/

# force AMD64 build here: the ssl1.1.1 workaround below assumes amd64
# Install prerequisites for Azure Speech Services: build-essential libssl-dev ca-certificates libasound2 wget
# See https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/quickstarts/setup-platform

Expand All @@ -30,12 +33,9 @@ apt-get install -y netcat-traditional && apt-get -q update

# Microsoft 8.0 issue: https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/2204
# This will install OpenSSL 1.1.1 because it is needed by the Speech SDK.
RUN \
wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb && \
wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.1.1f-1ubuntu2.20_amd64.deb && \
dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb && \
dpkg -i libssl-dev_1.1.1f-1ubuntu2.20_amd64.deb && \
rm libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb libssl-dev_1.1.1f-1ubuntu2.20_amd64.deb
# RUN ARCH=$(dpkg --print-architecture)
COPY ./install-speech-hack-libssl1.sh /
RUN /install-speech-hack-libssl1.sh


FROM publish_base as publish
Expand Down
15 changes: 9 additions & 6 deletions TaskEngine/Tasks/DescribeVideoTask.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
using ClassTranscribeDatabase;
using ClassTranscribeDatabase.Models;
using ClassTranscribeDatabase.Services;using Microsoft.Extensions.Logging;
using Newtonsoft.Json.Linq;
using System.Collections.Generic;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.EntityFrameworkCore;
using Newtonsoft.Json.Linq;
using ClassTranscribeDatabase;
using ClassTranscribeDatabase.Models;
using ClassTranscribeDatabase.Services;
using static ClassTranscribeDatabase.CommonUtils;


Expand Down Expand Up @@ -35,13 +37,14 @@ protected async override Task OnConsume(string videoId, TaskParameters taskParam

using var _context = CTDbContext.CreateDbContext();
Video video = await _context.Videos.FindAsync(videoId);

if (!video.HasSceneObjectData())
{
GetLogger().LogInformation($"Describe Video {videoId}: Early return - no scene data to process");
return;
}
TextData td = await _context.TextData.FindAsync(video.SceneObjectDataId);
await _context.Transcriptions.Where(t => t.VideoId == videoId).LoadAsync();

JObject sceneData = td.GetAsJSON() as JObject;
JArray scenes = sceneData["Scenes"] as JArray;
Expand Down
11 changes: 9 additions & 2 deletions TaskEngine/Tasks/DownloadMediaTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ protected override async Task OnConsume(string mediaId, TaskParameters taskParam
media = await _context.Medias.Where(m => m.Id == mediaId)
.Include(m => m.Playlist).FirstAsync();
GetLogger().LogInformation($"Downloading media id=({media.Id}), UniqueMediaIdentifier={media.UniqueMediaIdentifier}");
subdir = ToCourseOfferingSubDirectory(_context, media); // e.g. "/data/2203-abcd"
subdir = ToCourseOfferingSubDirectory(_context, media.Playlist); // e.g. "/data/2203-abcd"
}
Video video = new Video();
switch (media.SourceType)
Expand All @@ -72,7 +72,14 @@ protected override async Task OnConsume(string mediaId, TaskParameters taskParam

using (var _context = CTDbContext.CreateDbContext())
{
var latestMedia = await _context.Medias.FindAsync(media.Id);
var latestMedia = await _context.Medias
.Include(m=>m.Video).ThenInclude(v=>v.Video2)
.Include(m=>m.Video).ThenInclude(v=>v.Video1)
.FirstOrDefaultAsync(m => m.Id==media.Id); // Find does not support Include
if(latestMedia == null) { // should never happen...
GetLogger().LogInformation($"Media ({media.Id}): latestMedia == null !?");
return;
}
GetLogger().LogInformation($"Media ({media.Id}): latestMedia.Video == null is {latestMedia.Video == null}");

// Don't add video if there are already videos for the given media.
Expand Down
2 changes: 1 addition & 1 deletion TaskEngine/Tasks/TranscriptionTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public TranscriptionTask(RabbitMQConnection rabbitMQ, MSTranscriptionService msT
_captionQueries = captionQueries;

}
private async void buildMockCaptions(string videoId)
private async Task buildMockCaptions(string videoId)
{
GetLogger().LogInformation($"Building Mock Captions for video {videoId}");

Expand Down
4 changes: 3 additions & 1 deletion TestAzureCognitiveServices/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@
**/*.*proj.user
**/charts
**/bin
**/obj
**/obj
*Dockerfile
*cproj
2 changes: 1 addition & 1 deletion TestAzureCognitiveServices/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ static void Main(string[] args)
}

async static Task<string> useAzureTranslationAsync() {
var defaultKeys = "996885bc424b4fda9df983c404e7309c,westus";
var defaultKeys = "123,westus";
Console.WriteLine($"Environment variable AZURE_SUBSCRIPTION_KEYS=key,region;...");
var keys = System.Environment.GetEnvironmentVariable("AZURE_SUBSCRIPTION_KEYS") ?? defaultKeys;

Expand Down
19 changes: 8 additions & 11 deletions TestAzureCognitiveServices/TestAzure.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ COPY . .
RUN dotnet restore ./TestAzureCognitiveServices.csproj
RUN dotnet publish ./TestAzureCognitiveServices.csproj -c Release -o /app --no-restore

FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim as publish_base1
#FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim as publish_base1
# FROM mcr.microsoft.com/dotnet/aspnet:8.0-bookworm-slim-arm64v8 as publish_base1
FROM mcr.microsoft.com/dotnet/aspnet:8.0 as publish_base1

#COPY ./Program.cs /

# Grrr AzureServices does not work on dotnet8 on Debian 12 because it wont link to libssl3 - fix below is needed for short-term

Expand All @@ -17,16 +21,9 @@ RUN apt-get -y install build-essential libssl-dev libasound2 wget

# Microsoft 8.0 issue: https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/2204
# This will install OpenSSL 1.1.1 because it is needed by the Speech SDK.
RUN \
wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb && \
wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.1.1f-1ubuntu2.20_amd64.deb && \
dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb && \
dpkg -i libssl-dev_1.1.1f-1ubuntu2.20_amd64.deb && \
rm libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb libssl-dev_1.1.1f-1ubuntu2.20_amd64.deb




#RUN ARCH=$(dpkg --print-architecture)
COPY ./install-libssl1.sh /
RUN /install-libssl1.sh

FROM publish_base1 as publish1
WORKDIR /app
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.34.0" />
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.34.1" />
</ItemGroup>

</Project>
17 changes: 17 additions & 0 deletions TestAzureCognitiveServices/install-libssl1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh
# Temporary 2024 Hack for MS SpeechSDK to run on dotnet8
ARCH=$(dpkg --print-architecture)

if [ "$ARCH" = "arm64" ] ; then
BASE="http://ports.ubuntu.com/ubuntu-ports/pool/main/o/openssl/"
else
BASE="http://security.ubuntu.com/ubuntu/pool/main/o/openssl/"
fi

wget $BASE/libssl1.1_1.1.1f-1ubuntu2.20_${ARCH}.deb
wget $BASE/libssl-dev_1.1.1f-1ubuntu2.20_${ARCH}.deb
dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_${ARCH}.deb
dpkg -i libssl-dev_1.1.1f-1ubuntu2.20_${ARCH}.deb
rm libssl1.1_1.1.1f-1ubuntu2.20_${ARCH}.deb libssl-dev_1.1.1f-1ubuntu2.20_${ARCH}.deb


Loading

0 comments on commit 734d000

Please sign in to comment.