2024-05-08 10:14:57 +00:00
|
|
|
using Microsoft.CognitiveServices.Speech;
|
|
|
|
using Microsoft.CognitiveServices.Speech.Audio;
|
|
|
|
|
|
|
|
namespace AzureAi.Transcriber.Services;
|
|
|
|
|
|
|
|
public interface ITranscribeService
|
|
|
|
{
|
2024-05-08 12:29:14 +00:00
|
|
|
Task<string> TranscribeSnippet(string filePath);
|
|
|
|
Task<string> TranscribeFull(string filePath);
|
2024-05-08 10:14:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public class TranscribeService: ITranscribeService
|
|
|
|
{
|
|
|
|
private static string _speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY") ?? string.Empty;
|
|
|
|
private static string _speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION") ?? string.Empty;
|
|
|
|
|
|
|
|
private ILogger<TranscribeService> Logger;
|
|
|
|
|
|
|
|
public TranscribeService(ILogger<TranscribeService> logger)
|
|
|
|
{
|
|
|
|
Logger = logger;
|
|
|
|
|
|
|
|
if(string.IsNullOrWhiteSpace(_speechKey) || string.IsNullOrWhiteSpace(_speechRegion))
|
|
|
|
{
|
|
|
|
throw new InvalidOperationException("Speech key and region must be set in environment variables.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-08 12:29:14 +00:00
|
|
|
public async Task<string> TranscribeSnippet(string filePath)
|
2024-05-08 10:14:57 +00:00
|
|
|
{
|
2024-05-08 12:29:14 +00:00
|
|
|
Logger.LogInformation("Transcribing snippet of {filePath}", filePath);
|
2024-05-08 10:14:57 +00:00
|
|
|
var speechConfig = SpeechConfig.FromSubscription(_speechKey, _speechRegion);
|
|
|
|
speechConfig.SpeechRecognitionLanguage = "en-US";
|
|
|
|
|
|
|
|
using var audioConfig = AudioConfig.FromWavFileInput(filePath);
|
|
|
|
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
|
|
|
|
|
|
|
|
var result = await recognizer.RecognizeOnceAsync();
|
|
|
|
|
2024-05-08 12:29:14 +00:00
|
|
|
return result.Text;
|
|
|
|
}
|
|
|
|
|
|
|
|
public async Task<string> TranscribeFull(string filePath)
|
|
|
|
{
|
|
|
|
Logger.LogInformation("Transcribing full length of {filePath}", filePath);
|
|
|
|
var speechConfig = SpeechConfig.FromSubscription(_speechKey, _speechRegion);
|
|
|
|
using var audioConfig = AudioConfig.FromWavFileInput(filePath);
|
|
|
|
using var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
|
|
|
|
|
|
|
|
var stopRecognition = new TaskCompletionSource<int>();
|
|
|
|
|
|
|
|
var result = new System.Text.StringBuilder();
|
|
|
|
|
|
|
|
recognizer.Recognizing += (s, e) =>
|
|
|
|
{
|
|
|
|
Logger.LogTrace("Recognizing: {text}", e.Result.Text);
|
|
|
|
};
|
|
|
|
|
|
|
|
recognizer.Recognized += (s, e) =>
|
|
|
|
{
|
|
|
|
if (e.Result.Reason == ResultReason.RecognizedSpeech)
|
|
|
|
{
|
|
|
|
Logger.LogTrace("Recognized: {text}", e.Result.Text);
|
|
|
|
result.AppendLine(e.Result.Text);
|
|
|
|
}
|
|
|
|
else if (e.Result.Reason == ResultReason.NoMatch)
|
|
|
|
{
|
|
|
|
Logger.LogWarning("No match found.");
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
recognizer.Canceled += (s, e) =>
|
|
|
|
{
|
|
|
|
Logger.LogWarning("Canceled: {reason}", e.Reason);
|
|
|
|
if (e.Reason == CancellationReason.Error)
|
|
|
|
{
|
|
|
|
Logger.LogError("Error: {error}", e.ErrorDetails);
|
|
|
|
}
|
|
|
|
stopRecognition.TrySetResult(0);
|
|
|
|
};
|
|
|
|
|
|
|
|
recognizer.SessionStopped += (s, e) =>
|
|
|
|
{
|
|
|
|
Logger.LogInformation("Session stopped.");
|
|
|
|
stopRecognition.TrySetResult(0);
|
|
|
|
};
|
|
|
|
|
|
|
|
await recognizer.StartContinuousRecognitionAsync();
|
|
|
|
Task.WaitAny([stopRecognition.Task]);
|
|
|
|
|
|
|
|
return result.ToString();
|
2024-05-08 10:14:57 +00:00
|
|
|
}
|
|
|
|
}
|