from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline import difflib import spacy import re from nltk.sentiment import SentimentIntensityAnalyzer import nltk from collections import Counter import uvicorn import os import torch # Download NLTK resources try: nltk.download('vader_lexicon', quiet=True) nltk.download('punkt', quiet=True) nltk.download('stopwords', quiet=True) except: print("Could not download NLTK resources. Some features may be limited.") app = FastAPI() # Configure CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allows all origins allow_credentials=True, allow_methods=["*"], # Allows all methods allow_headers=["*"], # Allows all headers ) # Global variable for the pipeline humanize_pipe = None # Load NLP models try: # Load spaCy model nlp = spacy.load("en_core_web_sm") # Initialize sentiment analyzer sentiment_analyzer = SentimentIntensityAnalyzer() print("NLP models loaded successfully!") except Exception as e: print(f"Error loading NLP models: {e}") # Create fallback functions if models fail to load def mock_function(text): return "Model could not be loaded. This is a fallback response." def get_humanize_pipeline(): """ Lazy-load the humanization pipeline on first use. Uses standard settings that don't require accelerate. """ global humanize_pipe if humanize_pipe is None: try: print("Loading the humanizer model on CPU...") # Force CPU usage device = torch.device("cpu") # Load model with basic settings (no accelerate needed) model = AutoModelForSeq2SeqLM.from_pretrained( "danibor/flan-t5-base-humanizer", torch_dtype=torch.float32 # Use float32 instead of float16 for CPU ) tokenizer = AutoTokenizer.from_pretrained("danibor/flan-t5-base-humanizer") # Create pipeline with basic settings humanize_pipe = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, device=device # Explicitly specify CPU ) print("Humanizer model loaded successfully!") return humanize_pipe except Exception as e: print(f"Error loading humanizer model: {e}") # Create a simple pipeline-like function that just returns the input def simple_pipeline(text, **kwargs): return [{"generated_text": f"Could not process: {text} (Model failed to load)"}] humanize_pipe = simple_pipeline return humanize_pipe return humanize_pipe # Define request models class TextRequest(BaseModel): text: str class HumanizeResponse(BaseModel): original_text: str humanized_text: str diff: list original_word_count: int humanized_word_count: int nlp_analysis: dict class AnalyzeResponse(BaseModel): text: str word_count: int sentiment: dict entities: dict key_phrases: list readability: dict complexity: dict @app.post("/humanize", response_model=HumanizeResponse) async def humanize_text(request: TextRequest): input_text = request.text try: # Get or initialize the pipeline pipeline = get_humanize_pipeline() # Generate humanized text with basic settings result = pipeline( input_text, max_length=min(500, len(input_text) * 2), # Limit max length do_sample=True ) humanized_text = result[0]['generated_text'] # Get the differences diff = get_diff(input_text, humanized_text) # Process both texts with NLP nlp_analysis = perform_nlp_analysis(input_text, humanized_text) return { 'original_text': input_text, 'humanized_text': humanized_text, 'diff': diff, 'original_word_count': len(input_text.split()), 'humanized_word_count': len(humanized_text.split()), 'nlp_analysis': nlp_analysis } except Exception as e: print(f"Error in humanize endpoint: {str(e)}") raise HTTPException(status_code=500, detail=f"Error processing text: {str(e)}") def get_diff(text1, text2): """ Generate a list of changes between two texts. Returns a list of tuples (operation, text) where operation is '+' for addition, '-' for deletion, or ' ' for unchanged. """ d = difflib.Differ() diff = list(d.compare(text1.split(), text2.split())) result = [] for item in diff: operation = item[0] if operation in ['+', '-', ' ']: text = item[2:] result.append({'operation': operation, 'text': text}) return result def perform_nlp_analysis(original_text, humanized_text): """ Perform comprehensive NLP analysis on both original and humanized text. """ result = {} # Process both texts with spaCy original_doc = nlp(original_text) humanized_doc = nlp(humanized_text) # Sentiment analysis original_sentiment = sentiment_analyzer.polarity_scores(original_text) humanized_sentiment = sentiment_analyzer.polarity_scores(humanized_text) # Extract named entities original_entities = extract_entities(original_doc) humanized_entities = extract_entities(humanized_doc) # Extract key phrases using noun chunks original_phrases = extract_key_phrases(original_doc) humanized_phrases = extract_key_phrases(humanized_doc) # Readability metrics original_readability = calculate_readability(original_text) humanized_readability = calculate_readability(humanized_text) # Complexity metrics original_complexity = analyze_complexity(original_doc) humanized_complexity = analyze_complexity(humanized_doc) # Compile all results result = { 'original': { 'sentiment': original_sentiment, 'entities': original_entities, 'key_phrases': original_phrases, 'readability': original_readability, 'complexity': original_complexity }, 'humanized': { 'sentiment': humanized_sentiment, 'entities': humanized_entities, 'key_phrases': humanized_phrases, 'readability': humanized_readability, 'complexity': humanized_complexity } } return result def extract_entities(doc): """Extract and categorize named entities from a spaCy document.""" entities = {} for ent in doc.ents: if ent.label_ not in entities: entities[ent.label_] = [] if ent.text not in entities[ent.label_]: entities[ent.label_].append(ent.text) return entities def extract_key_phrases(doc): """Extract key phrases using noun chunks.""" return [chunk.text for chunk in doc.noun_chunks][:10] # Limit to top 10 def calculate_readability(text): """Calculate basic readability metrics.""" # Count sentences sentences = len(list(nltk.sent_tokenize(text))) if sentences == 0: sentences = 1 # Avoid division by zero # Count words words = len(text.split()) if words == 0: words = 1 # Avoid division by zero # Average words per sentence avg_words_per_sentence = words / sentences # Count syllables (simplified approach) syllables = count_syllables(text) # Calculate Flesch Reading Ease flesch = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words) return { 'sentence_count': sentences, 'word_count': words, 'avg_words_per_sentence': round(avg_words_per_sentence, 2), 'syllable_count': syllables, 'flesch_reading_ease': round(flesch, 2) } def count_syllables(text): """Count syllables in text (simplified approach).""" # This is a simplified syllable counter text = text.lower() text = re.sub(r'[^a-zA-Z]', ' ', text) words = text.split() count = 0 for word in words: word = word.strip() if not word: continue # Count vowel groups as syllables if word[-1] == 'e': word = word[:-1] vowel_count = len(re.findall(r'[aeiouy]+', word)) if vowel_count == 0: vowel_count = 1 count += vowel_count return count def analyze_complexity(doc): """Analyze text complexity using POS tags and dependency parsing.""" # Count POS tags pos_counts = Counter([token.pos_ for token in doc]) # Calculate lexical diversity total_tokens = len(doc) unique_tokens = len(set([token.text.lower() for token in doc])) lexical_diversity = unique_tokens / total_tokens if total_tokens > 0 else 0 # Count dependency relationship types dep_counts = Counter([token.dep_ for token in doc]) return { 'pos_distribution': dict(pos_counts), 'lexical_diversity': round(lexical_diversity, 4), 'dependency_types': dict(dep_counts) } @app.post("/analyze", response_model=AnalyzeResponse) async def analyze_text(request: TextRequest): """Endpoint to just analyze text without humanizing it.""" input_text = request.text try: # Process text with NLP doc = nlp(input_text) # Analyze text sentiment = sentiment_analyzer.polarity_scores(input_text) entities = extract_entities(doc) key_phrases = extract_key_phrases(doc) readability = calculate_readability(input_text) complexity = analyze_complexity(doc) return { 'text': input_text, 'word_count': len(input_text.split()), 'sentiment': sentiment, 'entities': entities, 'key_phrases': key_phrases, 'readability': readability, 'complexity': complexity } except Exception as e: raise HTTPException(status_code=500, detail=f"Error analyzing text: {str(e)}") # Add a root endpoint for Hugging Face Spaces health check @app.get("/") async def root(): return {"message": "Text Analysis and Humanization API is running!"} # For local development if __name__ == "__main__": uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)