Handle audio up to 30 seconds

2024-05-08 13:14:57 +03:00 · 2024-05-08 13:14:57 +03:00 · a6e5838d78
commit a6e5838d78
parent 24b726e82a
4 changed files with 56 additions and 1 deletions
--- a/AzureAi.Transcriber/.gitignore
+++ b/AzureAi.Transcriber/.gitignore
@ -1,2 +1,3 @@
 bin
-obj
+obj
+Data/*
--- a/AzureAi.Transcriber/AzureAi.Transcriber.csproj
+++ b/AzureAi.Transcriber/AzureAi.Transcriber.csproj
@ -6,4 +6,8 @@
    <ImplicitUsings>enable</ImplicitUsings>
  </PropertyGroup>

+  <ItemGroup>
+    <PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.37.0" />
+  </ItemGroup>
+
 </Project>
--- a/AzureAi.Transcriber/Program.cs
+++ b/AzureAi.Transcriber/Program.cs
@ -1,4 +1,5 @@
 using AzureAi.Transcriber.Components;
+using AzureAi.Transcriber.Services;

 var builder = WebApplication.CreateBuilder(args);

@ -6,8 +7,14 @@ var builder = WebApplication.CreateBuilder(args);
 builder.Services.AddRazorComponents()
    .AddInteractiveServerComponents();

+builder.Services.AddSingleton<IFileService, FileService>();
+builder.Services.AddSingleton<ITranscribeService, TranscribeService>();
+
 var app = builder.Build();

+// Warmup
+app.Services.GetRequiredService<ITranscribeService>();
+
 // Configure the HTTP request pipeline.
 if (!app.Environment.IsDevelopment())
 {
--- a/AzureAi.Transcriber/Services/TranscribeService.cs
+++ b/AzureAi.Transcriber/Services/TranscribeService.cs
@ -0,0 +1,43 @@
+using Microsoft.CognitiveServices.Speech;
+using Microsoft.CognitiveServices.Speech.Audio;
+
+namespace AzureAi.Transcriber.Services;
+
+public interface ITranscribeService
+{
+    Task<SpeechRecognitionResult> Transcribe(string filePath);
+}
+
+public class TranscribeService: ITranscribeService
+{
+    private static string _speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY") ?? string.Empty;
+    private static string _speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION") ?? string.Empty;
+
+    private ILogger<TranscribeService> Logger;
+    
+    public TranscribeService(ILogger<TranscribeService> logger)
+    {
+        Logger = logger;
+        
+        if(string.IsNullOrWhiteSpace(_speechKey) || string.IsNullOrWhiteSpace(_speechRegion))
+        {
+            throw new InvalidOperationException("Speech key and region must be set in environment variables.");
+        }
+    }
+    
+    public async Task<SpeechRecognitionResult> Transcribe(string filePath)
+    {
+        Logger.LogInformation("Transcribing {filePath}", filePath);
+        await Task.Delay(3000);
+        
+        var speechConfig = SpeechConfig.FromSubscription(_speechKey, _speechRegion);
+        speechConfig.SpeechRecognitionLanguage = "en-US";
+        
+        using var audioConfig = AudioConfig.FromWavFileInput(filePath);
+        using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
+        
+        var result = await recognizer.RecognizeOnceAsync();
+
+        return result;
+    }
+}