Spaces:
Runtime error
Runtime error
import csv | |
import json | |
import os | |
import time | |
import azure.cognitiveservices.speech as speechsdk | |
from dotenv import load_dotenv | |
from pydub import AudioSegment | |
from app.config import settings | |
def text_details(response_json): | |
mispronunced_words = [] | |
nbest = response_json.get("NBest", [])[0] # Assumes you use the top result | |
word_list = nbest.get("Words", []) | |
for word_info in word_list: | |
if word_info["PronunciationAssessment"]["ErrorType"] == "Mispronunciation": | |
# Collect necessary details | |
mispronunced_words.append( | |
{ | |
"word": word_info["Word"], | |
"offset": word_info["Offset"], | |
"position_in_text": word_list.index(word_info), # Get index for order reference | |
} | |
) | |
display_text = nbest["Display"] | |
return {"mispronunced_words": mispronunced_words, "display_text": display_text} | |
def pronunciation_assessment(file_path, language): | |
if str(file_path).endswith("mp3"): | |
mp3_path = file_path | |
file_path = file_path.replace(".mp3", ".wav") | |
sound = AudioSegment.from_mp3(mp3_path) | |
sound.export(file_path, format="wav") | |
# Initialize speech config | |
speech_config = speechsdk.SpeechConfig( | |
subscription=settings.AZURE_AI_SUBSCRIPTION_KEY, region=settings.AZURE_AI_REGION | |
) | |
speech_config.speech_recognition_language = language | |
# Create pronunciation assessment config | |
pronunciation_config = speechsdk.PronunciationAssessmentConfig( | |
grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark, | |
granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme, | |
enable_miscue=True, | |
) | |
# Initialize audio config from file | |
audio_config = speechsdk.audio.AudioConfig(filename=file_path) | |
# Initialize speech recognizer with the audio file | |
recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) | |
# Apply pronunciation assessment configuration | |
pronunciation_config.apply_to(recognizer) | |
# Perform recognition and assessment | |
result = recognizer.recognize_once() | |
audio_text_details = text_details(json.loads(result.json)) | |
if result.reason == speechsdk.ResultReason.RecognizedSpeech: | |
# Extract pronunciation assessment results | |
pronunciation_result = speechsdk.PronunciationAssessmentResult(result) | |
# Build result dictionary | |
analysis_results = { | |
"File": file_path, | |
"pronunciation_score": pronunciation_result.pronunciation_score, | |
"accuracy_score": pronunciation_result.accuracy_score, | |
"fluency_score": pronunciation_result.fluency_score, | |
"completeness_score": pronunciation_result.completeness_score, | |
"mispronunced_words": audio_text_details.get("mispronunced_words", []), | |
"display_text": audio_text_details.get("display_text", ""), | |
} | |
return analysis_results | |