File size: 3,060 Bytes
79b7942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import spacy
import speech_recognition as sr
from pydub import AudioSegment


# Enhanced Sentence Structure Scoring
def evaluate_sentence_structure(doc):
    sentence_structure_score = 0
    sentence_count = len(list(doc.sents))

    if sentence_count == 0:
        return 0

    for sentence in doc.sents:
        # Check sentence length (moderate-length sentences should score higher)
        sentence_length = len(sentence)
        if 10 <= sentence_length <= 20:
            sentence_length_score = 10
        elif 20 < sentence_length <= 30:
            sentence_length_score = 8
        else:
            sentence_length_score = 5 if sentence_length > 30 else 2

        # Syntactic complexity: Count unique dependency types in the sentence
        unique_dependencies = len(set([token.dep_ for token in sentence if token.dep_ != "punct"]))
        if unique_dependencies > 6:  # Complex sentence
            syntax_complexity_score = 10
        elif 3 <= unique_dependencies <= 6:  # Moderately complex
            syntax_complexity_score = 7
        else:
            syntax_complexity_score = 4  # Simple sentence

        # Average the length and syntax scores
        sentence_score = (sentence_length_score + syntax_complexity_score) / 2
        sentence_structure_score += sentence_score

    # Final average score for all sentences in the document
    return min((sentence_structure_score / sentence_count), 10)


# Grammar scoring based on Sentence Structure, Grammar Usage, and Vocabulary Range
def evaluate_grammar(text, doc):
    # 1. Sentence Structure
    sentence_structure_score = evaluate_sentence_structure(doc)
    errors = []

    # 2. Grammar Usage: Use the same logic for now
    # Example:
    # - The boy hungry.  # Wrong
    # - The boy is hungry # Correct
    grammar_usage_score = 10  # Assume perfect grammar initially
    for token in doc:
        if token.dep_ == "nsubj" and token.head.pos_ != "VERB":
            errors.append(
                {
                    "word": token.text,
                    "position_in_text": token.i,
                    "error": "Subject without a verb",
                    "suggestion": "Ensure the subject is followed by a verb.",
                }
            )
            grammar_usage_score -= 2  # Deduct points for common grammar mistakes

    # 3. Vocabulary Range
    unique_words = set([token.text.lower() for token in doc if token.is_alpha])  # Get Unique words in lower case
    vocabulary_range_score = min(len(unique_words) / len(doc), 1) * 10  # Lexical diversity

    # Final Grammar Score using the formula: (Sentence Structure + Grammar Usage + Vocabulary Range) / 3
    grammar_score = (sentence_structure_score + grammar_usage_score + vocabulary_range_score) / 3

    return {
        "sentence_structure": round(sentence_structure_score, 2),
        "grammar_usage": round(grammar_usage_score, 2),
        "vocabulary_range": round(vocabulary_range_score, 2),
        "grammar_score": round(grammar_score, 2),
        "errors": errors,
        "text": text,
    }