speech-analysis / app /utils /comprehension.py
jaykishan-b's picture
init
79b7942
raw
history blame
2.11 kB
from collections import Counter
import spacy
import speech_recognition as sr
from pydub import AudioSegment
# Comprehension Score Evaluation (No Reference Text)
def evaluate_comprehension(text, doc):
# 1. Listening Comprehension: Evaluate clarity and coherence of the sentences
sentence_count = len(list(doc.sents))
sentence_length_avg = sum([len(sent) for sent in doc.sents]) / sentence_count if sentence_count > 0 else 0
if sentence_length_avg <= 10: # Short and simple sentences
listening_comprehension_score = 10
elif 10 < sentence_length_avg <= 20: # Moderate length sentences
listening_comprehension_score = 8
else: # Longer sentences may indicate less clarity
listening_comprehension_score = 6
# 2. Topic Relevance: Lexical diversity and internal consistency
word_list = [token.text.lower() for token in doc if token.is_alpha]
word_count = len(word_list)
unique_words = len(set(word_list))
if word_count == 0:
topic_relevance_score = 0
else:
lexical_diversity = unique_words / word_count
if lexical_diversity > 0.7: # High diversity, likely more relevant
topic_relevance_score = 10
elif 0.4 <= lexical_diversity <= 0.7: # Moderate diversity
topic_relevance_score = 7
else: # Low diversity indicates repetition, which may indicate off-topic speech
topic_relevance_score = 4
# 3. Understanding: Based on simplicity and grammatical structure
understanding_score = listening_comprehension_score # Reuse comprehension score since it's already based on clarity
# Final Comprehension Score: (Listening Comprehension + Topic Relevance + Understanding) / 3
comprehension_score = (listening_comprehension_score + topic_relevance_score + understanding_score) / 3
return {
"listening_comprehension": round(listening_comprehension_score, 2),
"topic_relevance": round(topic_relevance_score, 2),
"understanding": round(understanding_score, 2),
"comprehension_score": round(comprehension_score, 2),
}