azure-ai/AzureAi.Transcriber/Services/TranscribeService.cs
2024-05-08 15:29:14 +03:00

93 lines
No EOL
3.2 KiB
C#

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
namespace AzureAi.Transcriber.Services;
public interface ITranscribeService
{
Task<string> TranscribeSnippet(string filePath);
Task<string> TranscribeFull(string filePath);
}
public class TranscribeService: ITranscribeService
{
private static string _speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY") ?? string.Empty;
private static string _speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION") ?? string.Empty;
private ILogger<TranscribeService> Logger;
public TranscribeService(ILogger<TranscribeService> logger)
{
Logger = logger;
if(string.IsNullOrWhiteSpace(_speechKey) || string.IsNullOrWhiteSpace(_speechRegion))
{
throw new InvalidOperationException("Speech key and region must be set in environment variables.");
}
}
public async Task<string> TranscribeSnippet(string filePath)
{
Logger.LogInformation("Transcribing snippet of {filePath}", filePath);
var speechConfig = SpeechConfig.FromSubscription(_speechKey, _speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using var audioConfig = AudioConfig.FromWavFileInput(filePath);
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
var result = await recognizer.RecognizeOnceAsync();
return result.Text;
}
public async Task<string> TranscribeFull(string filePath)
{
Logger.LogInformation("Transcribing full length of {filePath}", filePath);
var speechConfig = SpeechConfig.FromSubscription(_speechKey, _speechRegion);
using var audioConfig = AudioConfig.FromWavFileInput(filePath);
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
var stopRecognition = new TaskCompletionSource<int>();
var result = new System.Text.StringBuilder();
recognizer.Recognizing += (s, e) =>
{
Logger.LogTrace("Recognizing: {text}", e.Result.Text);
};
recognizer.Recognized += (s, e) =>
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
Logger.LogTrace("Recognized: {text}", e.Result.Text);
result.AppendLine(e.Result.Text);
}
else if (e.Result.Reason == ResultReason.NoMatch)
{
Logger.LogWarning("No match found.");
}
};
recognizer.Canceled += (s, e) =>
{
Logger.LogWarning("Canceled: {reason}", e.Reason);
if (e.Reason == CancellationReason.Error)
{
Logger.LogError("Error: {error}", e.ErrorDetails);
}
stopRecognition.TrySetResult(0);
};
recognizer.SessionStopped += (s, e) =>
{
Logger.LogInformation("Session stopped.");
stopRecognition.TrySetResult(0);
};
await recognizer.StartContinuousRecognitionAsync();
Task.WaitAny([stopRecognition.Task]);
return result.ToString();
}
}