Spaces:
Runtime error
Runtime error
import spacy | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
# Enhanced Sentence Structure Scoring | |
def evaluate_sentence_structure(doc): | |
sentence_structure_score = 0 | |
sentence_count = len(list(doc.sents)) | |
if sentence_count == 0: | |
return 0 | |
for sentence in doc.sents: | |
# Check sentence length (moderate-length sentences should score higher) | |
sentence_length = len(sentence) | |
if 10 <= sentence_length <= 20: | |
sentence_length_score = 10 | |
elif 20 < sentence_length <= 30: | |
sentence_length_score = 8 | |
else: | |
sentence_length_score = 5 if sentence_length > 30 else 2 | |
# Syntactic complexity: Count unique dependency types in the sentence | |
unique_dependencies = len(set([token.dep_ for token in sentence if token.dep_ != "punct"])) | |
if unique_dependencies > 6: # Complex sentence | |
syntax_complexity_score = 10 | |
elif 3 <= unique_dependencies <= 6: # Moderately complex | |
syntax_complexity_score = 7 | |
else: | |
syntax_complexity_score = 4 # Simple sentence | |
# Average the length and syntax scores | |
sentence_score = (sentence_length_score + syntax_complexity_score) / 2 | |
sentence_structure_score += sentence_score | |
# Final average score for all sentences in the document | |
return min((sentence_structure_score / sentence_count), 10) | |
# Grammar scoring based on Sentence Structure, Grammar Usage, and Vocabulary Range | |
def evaluate_grammar(text, doc): | |
# 1. Sentence Structure | |
sentence_structure_score = evaluate_sentence_structure(doc) | |
errors = [] | |
# 2. Grammar Usage: Use the same logic for now | |
# Example: | |
# - The boy hungry. # Wrong | |
# - The boy is hungry # Correct | |
grammar_usage_score = 10 # Assume perfect grammar initially | |
for token in doc: | |
if token.dep_ == "nsubj" and token.head.pos_ != "VERB": | |
errors.append( | |
{ | |
"word": token.text, | |
"position_in_text": token.i, | |
"error": "Subject without a verb", | |
"suggestion": "Ensure the subject is followed by a verb.", | |
} | |
) | |
grammar_usage_score -= 2 # Deduct points for common grammar mistakes | |
# 3. Vocabulary Range | |
unique_words = set([token.text.lower() for token in doc if token.is_alpha]) # Get Unique words in lower case | |
vocabulary_range_score = min(len(unique_words) / len(doc), 1) * 10 # Lexical diversity | |
# Final Grammar Score using the formula: (Sentence Structure + Grammar Usage + Vocabulary Range) / 3 | |
grammar_score = (sentence_structure_score + grammar_usage_score + vocabulary_range_score) / 3 | |
return { | |
"sentence_structure": round(sentence_structure_score, 2), | |
"grammar_usage": round(grammar_usage_score, 2), | |
"vocabulary_range": round(vocabulary_range_score, 2), | |
"grammar_score": round(grammar_score, 2), | |
"errors": errors, | |
"text": text, | |
} | |