File size: 2,112 Bytes
79b7942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from collections import Counter

import spacy
import speech_recognition as sr
from pydub import AudioSegment


# Comprehension Score Evaluation (No Reference Text)
def evaluate_comprehension(text, doc):
    # 1. Listening Comprehension: Evaluate clarity and coherence of the sentences
    sentence_count = len(list(doc.sents))
    sentence_length_avg = sum([len(sent) for sent in doc.sents]) / sentence_count if sentence_count > 0 else 0

    if sentence_length_avg <= 10:  # Short and simple sentences
        listening_comprehension_score = 10
    elif 10 < sentence_length_avg <= 20:  # Moderate length sentences
        listening_comprehension_score = 8
    else:  # Longer sentences may indicate less clarity
        listening_comprehension_score = 6

    # 2. Topic Relevance: Lexical diversity and internal consistency
    word_list = [token.text.lower() for token in doc if token.is_alpha]
    word_count = len(word_list)
    unique_words = len(set(word_list))

    if word_count == 0:
        topic_relevance_score = 0
    else:
        lexical_diversity = unique_words / word_count
        if lexical_diversity > 0.7:  # High diversity, likely more relevant
            topic_relevance_score = 10
        elif 0.4 <= lexical_diversity <= 0.7:  # Moderate diversity
            topic_relevance_score = 7
        else:  # Low diversity indicates repetition, which may indicate off-topic speech
            topic_relevance_score = 4

    # 3. Understanding: Based on simplicity and grammatical structure
    understanding_score = listening_comprehension_score  # Reuse comprehension score since it's already based on clarity

    # Final Comprehension Score: (Listening Comprehension + Topic Relevance + Understanding) / 3
    comprehension_score = (listening_comprehension_score + topic_relevance_score + understanding_score) / 3

    return {
        "listening_comprehension": round(listening_comprehension_score, 2),
        "topic_relevance": round(topic_relevance_score, 2),
        "understanding": round(understanding_score, 2),
        "comprehension_score": round(comprehension_score, 2),
    }