import spacy import speech_recognition as sr from pydub import AudioSegment # Enhanced Sentence Structure Scoring def evaluate_sentence_structure(doc): sentence_structure_score = 0 sentence_count = len(list(doc.sents)) if sentence_count == 0: return 0 for sentence in doc.sents: # Check sentence length (moderate-length sentences should score higher) sentence_length = len(sentence) if 10 <= sentence_length <= 20: sentence_length_score = 10 elif 20 < sentence_length <= 30: sentence_length_score = 8 else: sentence_length_score = 5 if sentence_length > 30 else 2 # Syntactic complexity: Count unique dependency types in the sentence unique_dependencies = len(set([token.dep_ for token in sentence if token.dep_ != "punct"])) if unique_dependencies > 6: # Complex sentence syntax_complexity_score = 10 elif 3 <= unique_dependencies <= 6: # Moderately complex syntax_complexity_score = 7 else: syntax_complexity_score = 4 # Simple sentence # Average the length and syntax scores sentence_score = (sentence_length_score + syntax_complexity_score) / 2 sentence_structure_score += sentence_score # Final average score for all sentences in the document return min((sentence_structure_score / sentence_count), 10) # Grammar scoring based on Sentence Structure, Grammar Usage, and Vocabulary Range def evaluate_grammar(text, doc): # 1. Sentence Structure sentence_structure_score = evaluate_sentence_structure(doc) errors = [] # 2. Grammar Usage: Use the same logic for now # Example: # - The boy hungry. # Wrong # - The boy is hungry # Correct grammar_usage_score = 10 # Assume perfect grammar initially for token in doc: if token.dep_ == "nsubj" and token.head.pos_ != "VERB": errors.append( { "word": token.text, "position_in_text": token.i, "error": "Subject without a verb", "suggestion": "Ensure the subject is followed by a verb.", } ) grammar_usage_score -= 2 # Deduct points for common grammar mistakes # 3. Vocabulary Range unique_words = set([token.text.lower() for token in doc if token.is_alpha]) # Get Unique words in lower case vocabulary_range_score = min(len(unique_words) / len(doc), 1) * 10 # Lexical diversity # Final Grammar Score using the formula: (Sentence Structure + Grammar Usage + Vocabulary Range) / 3 grammar_score = (sentence_structure_score + grammar_usage_score + vocabulary_range_score) / 3 return { "sentence_structure": round(sentence_structure_score, 2), "grammar_usage": round(grammar_usage_score, 2), "vocabulary_range": round(vocabulary_range_score, 2), "grammar_score": round(grammar_score, 2), "errors": errors, "text": text, }