jaykishan-b's picture
init
79b7942
raw
history blame
3.06 kB
import spacy
import speech_recognition as sr
from pydub import AudioSegment
# Enhanced Sentence Structure Scoring
def evaluate_sentence_structure(doc):
sentence_structure_score = 0
sentence_count = len(list(doc.sents))
if sentence_count == 0:
return 0
for sentence in doc.sents:
# Check sentence length (moderate-length sentences should score higher)
sentence_length = len(sentence)
if 10 <= sentence_length <= 20:
sentence_length_score = 10
elif 20 < sentence_length <= 30:
sentence_length_score = 8
else:
sentence_length_score = 5 if sentence_length > 30 else 2
# Syntactic complexity: Count unique dependency types in the sentence
unique_dependencies = len(set([token.dep_ for token in sentence if token.dep_ != "punct"]))
if unique_dependencies > 6: # Complex sentence
syntax_complexity_score = 10
elif 3 <= unique_dependencies <= 6: # Moderately complex
syntax_complexity_score = 7
else:
syntax_complexity_score = 4 # Simple sentence
# Average the length and syntax scores
sentence_score = (sentence_length_score + syntax_complexity_score) / 2
sentence_structure_score += sentence_score
# Final average score for all sentences in the document
return min((sentence_structure_score / sentence_count), 10)
# Grammar scoring based on Sentence Structure, Grammar Usage, and Vocabulary Range
def evaluate_grammar(text, doc):
# 1. Sentence Structure
sentence_structure_score = evaluate_sentence_structure(doc)
errors = []
# 2. Grammar Usage: Use the same logic for now
# Example:
# - The boy hungry. # Wrong
# - The boy is hungry # Correct
grammar_usage_score = 10 # Assume perfect grammar initially
for token in doc:
if token.dep_ == "nsubj" and token.head.pos_ != "VERB":
errors.append(
{
"word": token.text,
"position_in_text": token.i,
"error": "Subject without a verb",
"suggestion": "Ensure the subject is followed by a verb.",
}
)
grammar_usage_score -= 2 # Deduct points for common grammar mistakes
# 3. Vocabulary Range
unique_words = set([token.text.lower() for token in doc if token.is_alpha]) # Get Unique words in lower case
vocabulary_range_score = min(len(unique_words) / len(doc), 1) * 10 # Lexical diversity
# Final Grammar Score using the formula: (Sentence Structure + Grammar Usage + Vocabulary Range) / 3
grammar_score = (sentence_structure_score + grammar_usage_score + vocabulary_range_score) / 3
return {
"sentence_structure": round(sentence_structure_score, 2),
"grammar_usage": round(grammar_usage_score, 2),
"vocabulary_range": round(vocabulary_range_score, 2),
"grammar_score": round(grammar_score, 2),
"errors": errors,
"text": text,
}