Handle audio up to 30 seconds

This commit is contained in:
Tomkarho 2024-05-08 13:14:57 +03:00
parent 24b726e82a
commit a6e5838d78
Signed by: tomkarho
GPG key ID: 8A4E9CBB072D6B19
4 changed files with 56 additions and 1 deletions

View file

@ -1,2 +1,3 @@
bin
obj
obj
Data/*

View file

@ -6,4 +6,8 @@
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.37.0" />
</ItemGroup>
</Project>

View file

@ -1,4 +1,5 @@
using AzureAi.Transcriber.Components;
using AzureAi.Transcriber.Services;
var builder = WebApplication.CreateBuilder(args);
@ -6,8 +7,14 @@ var builder = WebApplication.CreateBuilder(args);
builder.Services.AddRazorComponents()
.AddInteractiveServerComponents();
builder.Services.AddSingleton<IFileService, FileService>();
builder.Services.AddSingleton<ITranscribeService, TranscribeService>();
var app = builder.Build();
// Warmup
app.Services.GetRequiredService<ITranscribeService>();
// Configure the HTTP request pipeline.
if (!app.Environment.IsDevelopment())
{

View file

@ -0,0 +1,43 @@
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
namespace AzureAi.Transcriber.Services;
public interface ITranscribeService
{
Task<SpeechRecognitionResult> Transcribe(string filePath);
}
public class TranscribeService: ITranscribeService
{
private static string _speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY") ?? string.Empty;
private static string _speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION") ?? string.Empty;
private ILogger<TranscribeService> Logger;
public TranscribeService(ILogger<TranscribeService> logger)
{
Logger = logger;
if(string.IsNullOrWhiteSpace(_speechKey) || string.IsNullOrWhiteSpace(_speechRegion))
{
throw new InvalidOperationException("Speech key and region must be set in environment variables.");
}
}
public async Task<SpeechRecognitionResult> Transcribe(string filePath)
{
Logger.LogInformation("Transcribing {filePath}", filePath);
await Task.Delay(3000);
var speechConfig = SpeechConfig.FromSubscription(_speechKey, _speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using var audioConfig = AudioConfig.FromWavFileInput(filePath);
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
var result = await recognizer.RecognizeOnceAsync();
return result;
}
}