from collections import Counter import spacy import speech_recognition as sr from pydub import AudioSegment # Comprehension Score Evaluation (No Reference Text) def evaluate_comprehension(text, doc): # 1. Listening Comprehension: Evaluate clarity and coherence of the sentences sentence_count = len(list(doc.sents)) sentence_length_avg = sum([len(sent) for sent in doc.sents]) / sentence_count if sentence_count > 0 else 0 if sentence_length_avg <= 10: # Short and simple sentences listening_comprehension_score = 10 elif 10 < sentence_length_avg <= 20: # Moderate length sentences listening_comprehension_score = 8 else: # Longer sentences may indicate less clarity listening_comprehension_score = 6 # 2. Topic Relevance: Lexical diversity and internal consistency word_list = [token.text.lower() for token in doc if token.is_alpha] word_count = len(word_list) unique_words = len(set(word_list)) if word_count == 0: topic_relevance_score = 0 else: lexical_diversity = unique_words / word_count if lexical_diversity > 0.7: # High diversity, likely more relevant topic_relevance_score = 10 elif 0.4 <= lexical_diversity <= 0.7: # Moderate diversity topic_relevance_score = 7 else: # Low diversity indicates repetition, which may indicate off-topic speech topic_relevance_score = 4 # 3. Understanding: Based on simplicity and grammatical structure understanding_score = listening_comprehension_score # Reuse comprehension score since it's already based on clarity # Final Comprehension Score: (Listening Comprehension + Topic Relevance + Understanding) / 3 comprehension_score = (listening_comprehension_score + topic_relevance_score + understanding_score) / 3 return { "listening_comprehension": round(listening_comprehension_score, 2), "topic_relevance": round(topic_relevance_score, 2), "understanding": round(understanding_score, 2), "comprehension_score": round(comprehension_score, 2), }