Spaces:

jaykishan-b
/

speech-analysis

Runtime error

App Files Files Community

speech-analysis / app /utils /grammar.py

jaykishan-b

init

79b7942 6 months ago

raw

history blame

3.06 kB

	import spacy
	import speech_recognition as sr
	from pydub import AudioSegment


	# Enhanced Sentence Structure Scoring
	def evaluate_sentence_structure(doc):
	sentence_structure_score = 0
	sentence_count = len(list(doc.sents))

	if sentence_count == 0:
	return 0

	for sentence in doc.sents:
	# Check sentence length (moderate-length sentences should score higher)
	sentence_length = len(sentence)
	if 10 <= sentence_length <= 20:
	sentence_length_score = 10
	elif 20 < sentence_length <= 30:
	sentence_length_score = 8
	else:
	sentence_length_score = 5 if sentence_length > 30 else 2

	# Syntactic complexity: Count unique dependency types in the sentence
	unique_dependencies = len(set([token.dep_ for token in sentence if token.dep_ != "punct"]))
	if unique_dependencies > 6: # Complex sentence
	syntax_complexity_score = 10
	elif 3 <= unique_dependencies <= 6: # Moderately complex
	syntax_complexity_score = 7
	else:
	syntax_complexity_score = 4 # Simple sentence

	# Average the length and syntax scores
	sentence_score = (sentence_length_score + syntax_complexity_score) / 2
	sentence_structure_score += sentence_score

	# Final average score for all sentences in the document
	return min((sentence_structure_score / sentence_count), 10)


	# Grammar scoring based on Sentence Structure, Grammar Usage, and Vocabulary Range
	def evaluate_grammar(text, doc):
	# 1. Sentence Structure
	sentence_structure_score = evaluate_sentence_structure(doc)
	errors = []

	# 2. Grammar Usage: Use the same logic for now
	# Example:
	# - The boy hungry. # Wrong
	# - The boy is hungry # Correct
	grammar_usage_score = 10 # Assume perfect grammar initially
	for token in doc:
	if token.dep_ == "nsubj" and token.head.pos_ != "VERB":
	errors.append(
	{
	"word": token.text,
	"position_in_text": token.i,
	"error": "Subject without a verb",
	"suggestion": "Ensure the subject is followed by a verb.",
	}
	)
	grammar_usage_score -= 2 # Deduct points for common grammar mistakes

	# 3. Vocabulary Range
	unique_words = set([token.text.lower() for token in doc if token.is_alpha]) # Get Unique words in lower case
	vocabulary_range_score = min(len(unique_words) / len(doc), 1) * 10 # Lexical diversity

	# Final Grammar Score using the formula: (Sentence Structure + Grammar Usage + Vocabulary Range) / 3
	grammar_score = (sentence_structure_score + grammar_usage_score + vocabulary_range_score) / 3

	return {
	"sentence_structure": round(sentence_structure_score, 2),
	"grammar_usage": round(grammar_usage_score, 2),
	"vocabulary_range": round(vocabulary_range_score, 2),
	"grammar_score": round(grammar_score, 2),
	"errors": errors,
	"text": text,
	}