Skip to content

Commit

Permalink
Add Transcription Enabled Storage project (#740)
Browse files Browse the repository at this point in the history
* add transcription enabled storage proj

* add CreateByTimer

* update CreateByTimerFunction

* address comments

* update .csprojs

* address comments

* update guide and address final comments
  • Loading branch information
Henry van der Vegte authored Sep 4, 2020
1 parent 84c6ddb commit d09689f
Show file tree
Hide file tree
Showing 98 changed files with 5,508 additions and 0 deletions.
1 change: 1 addition & 0 deletions samples/batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ Available samples:
| Language | Directory | Description |
| ---------- | -------- | ----------- |
| C# | [csharp](csharp) | C# calling batch transcription REST API through System.Net.Http |
| C# | [Transcription Enabled Storage](transcription-enabled-storage) | Project to automatically transcribe all audio files which are added to an Azure Storage Container. Setup via ARM template. |
| Python | [python](python) | Python client calling batch transcription REST API |
207 changes: 207 additions & 0 deletions samples/batch/transcription-enabled-storage/Connector/BatchClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// <copyright file="BatchClient.cs" company="Microsoft Corporation">
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
// </copyright>

namespace Connector
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using Connector.Serializable.TranscriptionFiles;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;

public class BatchClient
{
private const string SpeechToTextBasePath = "speechtotext/v3.0/";

private readonly string SubscriptionKey;

private readonly string HostName;

private ILogger Log;

public BatchClient(string subscriptionKey, string hostName, ILogger log)
{
SubscriptionKey = subscriptionKey;
HostName = hostName;
Log = log;
}

public static bool IsThrottledOrTimeoutStatusCode(HttpStatusCode statusCode)
{
if (statusCode == HttpStatusCode.TooManyRequests ||
statusCode == HttpStatusCode.GatewayTimeout ||
statusCode == HttpStatusCode.RequestTimeout ||
statusCode == HttpStatusCode.BadGateway ||
statusCode == HttpStatusCode.NotFound)
{
return true;
}

return false;
}

public Task<IEnumerable<Transcription>> GetTranscriptionsAsync()
{
var path = $"{HostName}{SpeechToTextBasePath}Transcriptions";
return this.GetAsync<IEnumerable<Transcription>>(path);
}

public Task<Transcription> GetTranscriptionAsync(Guid id)
{
var path = $"{HostName}{SpeechToTextBasePath}Transcriptions/{id}";
return this.GetAsync<Transcription>(path);
}

public async Task<TranscriptionFiles> GetTranscriptionFilesAsync(Guid id)
{
var path = $"{HostName}{SpeechToTextBasePath}Transcriptions/{id}/files";

var combinedTranscriptionFiles = new List<TranscriptionFile>();

do
{
var transcriptionFiles = await this.GetAsync<TranscriptionFiles>(path).ConfigureAwait(false);
combinedTranscriptionFiles.AddRange(transcriptionFiles.Values);
path = transcriptionFiles.NextLink;
}
while (!string.IsNullOrEmpty(path));

return new TranscriptionFiles(combinedTranscriptionFiles, null);
}

public Task<Uri> PostTranscriptionAsync(string name, string description, string locale, Dictionary<string, string> properties, IEnumerable<string> contentUrls, IEnumerable<Guid> modelIds)
{
var models = modelIds.Select(m => ModelIdentity.Create(m)).ToList();
var path = $"{SpeechToTextBasePath}Transcriptions/";

var transcriptionDefinition = TranscriptionDefinition.Create(name, description, locale, contentUrls, properties, models);
return this.PostAsJsonAsync(path, transcriptionDefinition);
}

public Task DeleteTranscriptionAsync(Guid id)
{
var path = $"{SpeechToTextBasePath}Transcriptions/{id}";
return this.DeleteAsync(path);
}

private static Uri GetLocationFromPostResponseAsync(WebHeaderCollection headers)
{
return new Uri(headers["Location"]);
}

private async Task<HttpWebResponse> GetHttpWebResponseAsync(HttpWebRequest request)
{
var webresponse = await request.GetResponseAsync().ConfigureAwait(false);
var response = (HttpWebResponse)webresponse;

if (IsThrottledOrTimeoutStatusCode(response.StatusCode))
{
throw new TimeoutException();
}

if (response.StatusCode != HttpStatusCode.Accepted && response.StatusCode != HttpStatusCode.Created)
{
var failureMessage = $"Failure: Status Code {response.StatusCode}, {response.StatusDescription}";
Log.LogInformation(failureMessage);
throw new WebException(failureMessage);
}

return response;
}

private HttpClient CreateHttpClient()
{
var client = HttpClientFactory.Create();
client.Timeout = TimeSpan.FromMinutes(25);
client.BaseAddress = new Uri(HostName);
client.DefaultRequestHeaders.TransferEncodingChunked = null;
client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", SubscriptionKey);
return client;
}

private async Task<Uri> PostAsJsonAsync(string path, TranscriptionDefinition payload)
{
var request = BuildPostWebRequest(path, payload);
Log.LogInformation("Request: " + request);
var webResponse = await GetHttpWebResponseAsync(request).ConfigureAwait(false);
Log.LogInformation("StatusCode: " + webResponse.StatusCode);
return GetLocationFromPostResponseAsync(webResponse.Headers);
}

private HttpWebRequest BuildPostWebRequest(string path, TranscriptionDefinition payload)
{
var request = (HttpWebRequest)WebRequest.Create(new Uri(HostName + path));

request.ContentType = "application/json; charset=UTF-8";
request.Accept = "application/json";
request.Method = "POST";
request.Headers.Add("Ocp-Apim-Subscription-Key", SubscriptionKey);

var payloadString = JsonConvert.SerializeObject(payload);
var data = Encoding.ASCII.GetBytes(payloadString);
request.ContentLength = data.Length;

using (var stream = request.GetRequestStream())
{
stream.Write(data, 0, data.Length);
}

return request;
}

private async Task DeleteAsync(string path)
{
Log.LogInformation($"Creating DELETE request for {HostName + path}");

using (var httpClient = CreateHttpClient())
{
var response = await httpClient.DeleteAsync(new Uri(HostName + path)).ConfigureAwait(false);

if (IsThrottledOrTimeoutStatusCode(response.StatusCode))
{
throw new TimeoutException();
}

if (!response.IsSuccessStatusCode)
{
var failureMessage = $"Failure: Status Code {response.StatusCode}, {response.Content.Headers}";
Log.LogInformation(failureMessage);
throw new WebException(failureMessage);
}
}
}

private async Task<TResponse> GetAsync<TResponse>(string path)
{
Log.LogInformation($"Creating GET request for {path}");

using (var httpClient = CreateHttpClient())
{
var response = await httpClient.GetAsync(new Uri(path)).ConfigureAwait(false);

if (IsThrottledOrTimeoutStatusCode(response.StatusCode))
{
throw new TimeoutException();
}

var contentType = response.Content.Headers.ContentType;
if (response.IsSuccessStatusCode && string.Equals(contentType.MediaType, "application/json", StringComparison.OrdinalIgnoreCase))
{
var result = await response.Content.ReadAsJsonAsync<TResponse>().ConfigureAwait(false);
return result;
}

var failureMessage = $"Failure: Status Code {response.StatusCode}, {response.Content.Headers}";
Log.LogInformation(failureMessage);
throw new WebException(failureMessage);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<CodeAnalysisRuleSet>..\TranscriptionEnabledStorage.ruleset</CodeAnalysisRuleSet>
<LangVersion>latest</LangVersion>
<DebugType>full</DebugType>
<OutputPath>bin\Debug\</OutputPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<CodeAnalysisRuleSet>..\TranscriptionEnabledStorage.ruleset</CodeAnalysisRuleSet>
<LangVersion>latest</LangVersion>
<DebugType>pdbonly</DebugType>
<OutputPath>bin\Release\</OutputPath>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Azure.WebJobs.Extensions.ServiceBus" Version="4.1.1" />
<PackageReference Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="2.9.8">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.11.0" />
<PackageReference Include="Microsoft.NET.Sdk.Functions" Version="3.0.5" />
<PackageReference Include="NAudio" Version="1.10.0" />
<PackageReference Include="StyleCop.Analyzers" Version="1.1.118">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="System.Data.SqlClient" Version="4.8.1" />
<PackageReference Include="WindowsAzure.Storage" Version="9.3.3" />
</ItemGroup>

<ItemGroup>
<None Update="connectorsettings.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<AdditionalFiles Include="..\stylecop.json">
<Link>stylecop.json</Link>
</AdditionalFiles>
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// <copyright file="CostEstimation.cs" company="Microsoft Corporation">
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
// </copyright>

namespace Connector
{
using System;

public static class CostEstimation
{
// prices in euro
private const double STTCostPerHour = 0.844d;

private const double STTCustomModelCostPerHour = 1.181d;

private const double TextAnalyticsCostPerHour = 0.72d;

public static double GetCostEstimation(
TimeSpan timeSpan,
int numberOfChannels,
bool isCustomModel,
bool sentimentAnalysisAdded,
bool entityRedactionAdded)
{
double costPerHour = isCustomModel ? STTCustomModelCostPerHour : STTCostPerHour;
var price = timeSpan.TotalHours * costPerHour;

if (sentimentAnalysisAdded)
{
price += timeSpan.TotalHours * TextAnalyticsCostPerHour;
}

if (entityRedactionAdded)
{
price += timeSpan.TotalHours * TextAnalyticsCostPerHour;
}

price *= numberOfChannels;
return price;
}
}
}
Loading

0 comments on commit d09689f

Please sign in to comment.