Spaces:

Hadiil
/

home23

Sleeping

App Files Files Community

Hadiil commited on Apr 16

Commit

110a761

verified ·

1 Parent(s): b755f09

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -238

app.py CHANGED Viewed

@@ -1,29 +1,25 @@
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse
-from fastapi.templating import Jinja2Templates
-from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering
 from typing import Optional, Dict, Any, List
 import logging
 import time
 import os
 import io
 import json
 from PIL import Image
 from docx import Document
 import fitz  # PyMuPDF
 import pandas as pd
 from functools import lru_cache
-import re
 import torch
 import numpy as np
 from pydantic import BaseModel
 import asyncio
 import google.generativeai as genai
-# Set the TRANSFORMERS_CACHE environment variable to a writable directory
-os.environ["HF_HOME"] = "/tmp/huggingface_cache"
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -37,15 +33,18 @@ os.makedirs(upload_dir, exist_ok=True)
 app = FastAPI(
     title="Cosmic AI Assistant",
-    description="An advanced AI assistant with space-themed interface and Gemini-powered features",
     version="2.0.0"
 )
-# Mount static files
 app.mount("/static", StaticFiles(directory="static"), name="static")
-# Setup templates
-templates = Jinja2Templates(directory="templates")
 # Gemini API Configuration
 API_KEY = "AIzaSyCwmgD8KxzWiuivtySNtcZF_rfTvx9s9sY"  # Replace with your actual API key
@@ -55,14 +54,32 @@ genai.configure(api_key=API_KEY)
 MODELS = {
     "summarization": "sshleifer/distilbart-cnn-12-6",
     "image-to-text": "Salesforce/blip-image-captioning-large",
-    "question-answering": "deepset/roberta-base-squad2",
     "visual-qa": "dandelin/vilt-b32-finetuned-vqa",
-    "text-generation": "gpt2-medium",
-    "chatbot": "gemini-1.5-pro",
-    "translation": "gemini-1.5-pro"
 }
-# Cache for model loading
 @lru_cache(maxsize=8)
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
@@ -72,11 +89,8 @@ def load_model(task: str, model_name: str = None):
         model_to_load = model_name or MODELS.get(task)
-        if task in ["chatbot", "translation"]:
-            logger.info(f"Initializing Gemini model: {model_to_load}")
-            model = genai.GenerativeModel(model_to_load)
-            logger.info(f"Gemini model loaded in {time.time() - start_time:.2f}s")
-            return model
         if task == "visual-qa":
             processor = ViltProcessor.from_pretrained(model_to_load)
@@ -97,48 +111,96 @@ def load_model(task: str, model_name: str = None):
                 logger.info(f"VQA raw output: {answer}")
                 return answer
-            logger.info(f"Visual QA model loaded in {time.time() - start_time:.2f}s")
             return vqa_function
-        model = pipeline(task, model=model_to_load)
-        logger.info(f"Pipeline model loaded in {time.time() - start_time:.2f}s")
-        return model
     except Exception as e:
         logger.error(f"Model load failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Model loading failed: {task} - {str(e)}")
-def get_chatbot_response(user_input: str):
-    """Function to generate response with Gemini"""
     if not user_input:
-        return "Please ask a question."
     try:
         chatbot = load_model("chatbot")
-        response = chatbot.generate_content(user_input)
         return response.text.strip()
     except Exception as e:
         return f"Error: {str(e)}"
 def translate_text(text: str, target_language: str):
-    """Translate text to any target language using Gemini"""
     if not text:
         return "Please provide text to translate."
     try:
-        translator = load_model("translation")
-        prompt = f"Translate this text to {target_language}: {text}"
-        response = translator.generate_content(prompt)
-        return response.text.strip()
     except Exception as e:
         return f"Translation error: {str(e)}"
 def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
-    """Enhanced intent detection with dynamic translation support including bracketed languages"""
     target_language = "English"  # Default
     if file:
         content_type = file.content_type.lower() if file.content_type else ""
         filename = file.filename.lower() if file.filename else ""
         if content_type.startswith('image/'):
             if text and any(q in text.lower() for q in ['what is', 'what\'s', 'describe', 'tell me about', 'explain','how many', 'what color', 'is there', 'are they', 'does the']):
                 return "visual-qa", target_language
@@ -156,11 +218,22 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
     if any(keyword in text_lower for keyword in ['chat', 'talk', 'converse', 'ask gemini']):
         return "chatbot", target_language
-    # Dynamic translation detection with optional brackets
-    translate_match = re.search(r'translate.*to\s+\[?([a-zA-Z]+)\]?:?', text_lower)
-    if translate_match:
-        target_language = translate_match.group(1).capitalize()
-        return "translate", target_language
     vqa_patterns = [
         r'how (many|much)',
@@ -184,15 +257,6 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
     if any(re.search(pattern, text_lower) for pattern in summarization_patterns):
         return "summarize", target_language
-    question_patterns = [
-        r'\b(what|when|where|why|how|who|which)\b',
-        r'\?',
-        r'\b(explain|tell me|describe|define)\b'
-    ]
-    if any(re.search(pattern, text_lower) for pattern in question_patterns):
-        return "question-answering", target_language
     generation_patterns = [
         r'\b(write|generate|create|compose)\b',
@@ -205,6 +269,12 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
     if len(text) > 100:
         return "summarize", target_language
     return "chatbot", target_language
 class ProcessResponse(BaseModel):
@@ -212,85 +282,10 @@ class ProcessResponse(BaseModel):
     type: str
     additional_data: Optional[Dict[str, Any]] = None
-# Chatbot Web Interface with Translation Option
-@app.get("/chatbot", response_class=HTMLResponse)
 async def chatbot_interface():
-    return """
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>Cosmic AI Chatbot</title>
-        <style>
-            body { font-family: Arial, sans-serif; background: #282c34; color: white; text-align: center; }
-            .chat-box { width: 50%; margin: 20px auto; background: #444; padding: 20px; border-radius: 10px; }
-            #chat-box { max-height: 400px; overflow-y: auto; text-align: left; }
-            input, button, select { width: 80%; padding: 10px; margin: 5px; border-radius: 5px; }
-            button { background: #0084ff; color: white; cursor: pointer; }
-            .message { margin: 10px 0; padding: 10px; border-radius: 5px; }
-            .user-message { background: #555; }
-            .bot-message { background: #666; }
-        </style>
-    </head>
-    <body>
-        <div class="chat-box">
-            <h1>Cosmic AI Chatbot</h1>
-            <div id="chat-box">
-                <div class="message bot-message">
-                    <b>Bot:</b> Hello! I am your Cosmic AI Assistant. Upload a file or ask a question, and I can:<br>
-                    - Summarize documents<br>
-                    - Describe images<br>
-                    - Answer your questions<br>
-                    - Translate text to any language<br>
-                    - Generate visualization code
-                </div>
-            </div>
-            <input type="text" id="user-input" placeholder="Type your message...">
-            <select id="translate-to">
-                <option value="">No translation</option>
-                <option value="English">English</option>
-                <option value="French">French</option>
-                <option value="German">German</option>
-                <option value="Spanish">Spanish</option>
-                <option value="Italian">Italian</option>
-                <option value="Russian">Russian</option>
-                <option value="Chinese">Chinese</option>
-                <option value="Japanese">Japanese</option>
-            </select>
-            <button onclick="sendMessage()">Send</button>
-        </div>
-        <script>
-            async function sendMessage() {
-                let inputField = document.getElementById("user-input");
-                let translateTo = document.getElementById("translate-to").value;
-                let chatBox = document.getElementById("chat-box");
-                let userMessage = inputField.value.trim();
-                if (!userMessage) return;
-                let messageToSend = translateTo ? `Translate this to ${translateTo}: ${userMessage}` : userMessage;
-                chatBox.innerHTML += `<div class="message user-message"><b>You:</b> ${userMessage}</div>`;
-                let response = await fetch("/chat", {
-                    method: "POST",
-                    headers: { "Content-Type": "application/json" },
-                    body: JSON.stringify({ message: messageToSend })
-                });
-                let result = await response.json();
-                chatBox.innerHTML += `<div class="message bot-message"><b>Bot:</b> ${result.response}</div>`;
-                inputField.value = "";
-                chatBox.scrollTop = chatBox.scrollHeight;
-            }
-            document.getElementById("user-input").addEventListener("keypress", function(e) {
-                if (e.key === "Enter") {
-                    sendMessage();
-                }
-            });
-        </script>
-    </body>
-    </html>
-    """
 @app.post("/chat")
 async def chat_endpoint(data: dict):
@@ -298,7 +293,7 @@ async def chat_endpoint(data: dict):
     if not message:
         raise HTTPException(status_code=400, detail="No message provided")
     try:
-        response = get_chatbot_response(message)
         return {"response": response}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
@@ -319,14 +314,30 @@ async def process_input(
     try:
         if intent == "chatbot":
-            response = get_chatbot_response(text)
             return {"response": response, "type": "chat"}
         elif intent == "translate":
             content = await extract_text_from_file(file) if file else text
-            content = re.sub(r'translate.*to\s+\[?[a-zA-Z]+\]?:?\s*', '', content, flags=re.IGNORECASE).strip()
-            translated_text = translate_text(content, target_language)
-            return {"response": translated_text, "type": "translation"}
         elif intent == "summarize":
             content = await extract_text_from_file(file) if file else text
@@ -364,28 +375,6 @@ async def process_input(
             final_summary = re.sub(r'\s+', ' ', final_summary).strip()
             return {"response": final_summary, "type": "summary"}
-        elif intent == "question-answering":
-            context = await extract_text_from_file(file) if file else None
-            if not context and not text:
-                raise HTTPException(status_code=400, detail="No context provided")
-            qa_pipeline = load_model("question-answering")
-            if not context and "?" in text:
-                parts = text.split("?", 1)
-                question = parts[0] + "?"
-                context = parts[1].strip() if len(parts) > 1 and parts[1].strip() else text
-            else:
-                question = text if text else "Summarize this document"
-            result = qa_pipeline(
-                question=question,
-                context=context[:2000] if context else text[:2000]
-            )
-            return {"response": result["answer"], "type": "answer"}
         elif intent == "image-to-text":
             if not file or not file.content_type.startswith('image/'):
                 raise HTTPException(status_code=400, detail="An image file is required")
@@ -423,6 +412,11 @@ async def process_input(
                 answer = answer.capitalize()
                 if not answer.endswith(('.', '!', '?')):
                     answer += '.'
             logger.info(f"Final VQA answer: {answer}")
@@ -447,25 +441,19 @@ async def process_input(
                 df = pd.read_excel(io.BytesIO(file_content))
             code = generate_visualization_code(df, text)
-            return {"response": code, "type": "visualization_code"}
         elif intent == "text-generation":
-            generator = load_model("text-generation")
-            generated = generator(
-                text,
-                max_length=200,
-                num_return_sequences=1,
-                temperature=0.8,
-                top_p=0.92,
-                do_sample=True
-            )
-            return {"response": generated[0]["generated_text"], "type": "generated_text"}
         else:
-            response = get_chatbot_response(text or "Hello! How can I assist you?")
             return {"response": response, "type": "chat"}
     except Exception as e:
@@ -476,42 +464,51 @@ async def process_input(
         logger.info(f"Request processed in {process_time:.2f} seconds")
 async def extract_text_from_file(file: UploadFile) -> str:
-    """Enhanced text extraction with better error handling and format support"""
     if not file:
         return ""
     content = await file.read()
     filename = file.filename.lower()
     try:
         if filename.endswith('.pdf'):
             try:
                 doc = fitz.open(stream=content, filetype="pdf")
                 text = ""
                 for page in doc:
                     text += page.get_text()
                 return text
             except Exception as pdf_error:
-                logger.warning(f"PyMuPDF failed, trying pdfminer: {str(pdf_error)}")
                 from pdfminer.high_level import extract_text
                 from io import BytesIO
                 return extract_text(BytesIO(content))
         elif filename.endswith(('.docx', '.doc')):
             doc = Document(io.BytesIO(content))
             return "\n".join(para.text for para in doc.paragraphs)
         elif filename.endswith('.txt'):
             return content.decode('utf-8', errors='replace')
         elif filename.endswith('.rtf'):
             text = content.decode('utf-8', errors='replace')
             text = re.sub(r'\\[a-z]+', ' ', text)
             text = re.sub(r'\{|\}|\\', '', text)
             return text
         else:
             raise HTTPException(status_code=400, detail=f"Unsupported file format: {filename}")
     except Exception as e:
         logger.error(f"File extraction error: {str(e)}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")
 def generate_visualization_code(df: pd.DataFrame, request: str = None) -> str:
     """Generate visualization code based on data analysis"""
@@ -583,53 +580,6 @@ plt.savefig('distribution_plot.png')
 plt.show()
 print(df['{num_col}'].describe())"""
-    elif len(numeric_cols) >= 3 and ("pairplot" in request_lower or "multi" in request_lower):
-        return f"""import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-df = pd.read_excel('data.xlsx')
-plt.figure(figsize=(12, 10))
-sns.set(style="ticks")
-plot = sns.pairplot(df[{numeric_cols[:5]}], diag_kind='kde', plot_kws={{'alpha': 0.6}})
-plot.fig.suptitle('Correlation Matrix of Numeric Variables', y=1.02, fontsize=16)
-plt.tight_layout()
-plt.savefig('pairplot.png')
-plt.show()
-correlation_matrix = df[{numeric_cols[:5]}].corr()
-plt.figure(figsize=(10, 8))
-sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
-plt.title('Correlation Matrix')
-plt.tight_layout()
-plt.savefig('correlation_matrix.png')
-plt.show()"""
-    elif len(date_cols) >= 1 and len(numeric_cols) >= 1 and ("time" in request_lower or "trend" in request_lower):
-        date_col = date_cols[0]
-        num_col = numeric_cols[0]
-        return f"""import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-import matplotlib.dates as mdates
-df = pd.read_excel('data.xlsx')
-df['{date_col}'] = pd.to_datetime(df['{date_col}'])
-df = df.sort_values(by='{date_col}')
-plt.figure(figsize=(12, 6))
-plt.plot(df['{date_col}'], df['{num_col}'], marker='o', linestyle='-', color='#7b2cbf', linewidth=2, markersize=6)
-plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
-plt.gca().xaxis.set_major_locator(mdates.AutoDateLocator())
-plt.title('Trend of {num_col} over time', fontsize=15)
-plt.xlabel('Date', fontsize=12)
-plt.ylabel('{num_col}', fontsize=12)
-plt.grid(True, alpha=0.3)
-plt.xticks(rotation=45)
-plt.tight_layout()
-plt.savefig('time_series.png')
-plt.show()
-from scipy import stats
-x = np.arange(len(df))
-slope, intercept, r_value, p_value, std_err = stats.linregress(x, df['{num_col}'])
-print(f"Trend: {{'Positive' if slope > 0 else 'Negative'}}, Slope: {{slope:.4f}}, R²: {{r_value**2:.4f}}")"""
     else:
         return f"""import pandas as pd
 import matplotlib.pyplot as plt
@@ -669,7 +619,7 @@ plt.show()"""
 @app.get("/", include_in_schema=False)
 async def home():
     """Redirect to the static index.html file"""
-    return RedirectResponse(url="/static/index.html")
 @app.get("/health", include_in_schema=True)
 async def health_check():
@@ -684,36 +634,35 @@ async def list_models():
 @app.on_event("startup")
 async def startup_event():
     """Pre-load models at startup with timeout"""
     logger.info("Starting model pre-loading...")
-    # Load Gemini models synchronously
-    for task in ["chatbot", "translation"]:
-        try:
-            load_model(task)  # Synchronous call
-            logger.info(f"Successfully loaded {task} model")
-        except Exception as e:
-            logger.error(f"Error pre-loading {task}: {str(e)}")
-    # Load Hugging Face models asynchronously
     async def load_model_with_timeout(task):
         try:
-            await asyncio.wait_for(load_model(task), timeout=60.0)
             logger.info(f"Successfully loaded {task} model")
         except asyncio.TimeoutError:
             logger.warning(f"Timeout loading {task} model - will load on demand")
         except Exception as e:
             logger.error(f"Error pre-loading {task}: {str(e)}")
     await asyncio.gather(
         load_model_with_timeout("summarization"),
         load_model_with_timeout("image-to-text"),
-        load_model_with_timeout("visual-qa")
     )
 if __name__ == "__main__":
     import uvicorn
-    # Ensure the upload_dir is writable
-    logger.info(f"Checking write permissions for {upload_dir}")
-    if not os.access(upload_dir, os.W_OK):
-        logger.error(f"No write permissions for {upload_dir}")
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)

 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse
+from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, M2M100ForConditionalGeneration, M2M100Tokenizer
 from typing import Optional, Dict, Any, List
 import logging
 import time
 import os
 import io
 import json
+import re
 from PIL import Image
 from docx import Document
 import fitz  # PyMuPDF
 import pandas as pd
 from functools import lru_cache
 import torch
 import numpy as np
 from pydantic import BaseModel
 import asyncio
 import google.generativeai as genai
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
 app = FastAPI(
     title="Cosmic AI Assistant",
+    description="An advanced AI assistant with space-themed interface and translation features",
     version="2.0.0"
 )
+# Mount static files
 app.mount("/static", StaticFiles(directory="static"), name="static")
+# Mount videos directory
+app.mount("/videos", StaticFiles(directory="videos"), name="videos")
+# Mount videos directory
+app.mount("/images", StaticFiles(directory="images"), name="images")
 # Gemini API Configuration
 API_KEY = "AIzaSyCwmgD8KxzWiuivtySNtcZF_rfTvx9s9sY"  # Replace with your actual API key
 MODELS = {
     "summarization": "sshleifer/distilbart-cnn-12-6",
     "image-to-text": "Salesforce/blip-image-captioning-large",
     "visual-qa": "dandelin/vilt-b32-finetuned-vqa",
+    "chatbot": "gemini-1.5-pro",  # Handles both chat and text generation
+    "translation": "facebook/m2m100_418M"
+}
+# Supported languages for translation
+SUPPORTED_LANGUAGES = {
+    "english": "en",
+    "french": "fr",
+    "german": "de",
+    "spanish": "es",
+    "italian": "it",
+    "russian": "ru",
+    "chinese": "zh",
+    "japanese": "ja",
+    "arabic": "ar",
+    "hindi": "hi",
+    "portuguese": "pt",
+    "korean": "ko"
 }
+# Global variables for pre-loaded translation model
+translation_model = None
+translation_tokenizer = None
+# Cache for model loading (excluding translation)
 @lru_cache(maxsize=8)
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
         model_to_load = model_name or MODELS.get(task)
+        if task == "chatbot":  # Gemini handles both chat and text generation
+            return genai.GenerativeModel(model_to_load)
         if task == "visual-qa":
             processor = ViltProcessor.from_pretrained(model_to_load)
                 logger.info(f"VQA raw output: {answer}")
                 return answer
             return vqa_function
+        return pipeline(task, model=model_to_load)
     except Exception as e:
         logger.error(f"Model load failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Model loading failed: {task} - {str(e)}")
+def get_gemini_response(user_input: str, is_generation: bool = False):
+    """Function to generate response with Gemini for both chat and text generation"""
     if not user_input:
+        return "Please provide some input."
     try:
         chatbot = load_model("chatbot")
+        if is_generation:
+            prompt = f"Generate creative text based on this prompt: {user_input}"
+        else:
+            prompt = user_input
+        response = chatbot.generate_content(prompt)
         return response.text.strip()
     except Exception as e:
         return f"Error: {str(e)}"
 def translate_text(text: str, target_language: str):
+    """Translate text to any target language using pre-loaded M2M100 model"""
     if not text:
         return "Please provide text to translate."
     try:
+        global translation_model, translation_tokenizer
+        target_lang = target_language.lower()
+        if target_lang not in SUPPORTED_LANGUAGES:
+            similar = [lang for lang in SUPPORTED_LANGUAGES if target_lang in lang or lang in target_lang]
+            if similar:
+                target_lang = similar[0]
+            else:
+                return f"Language '{target_language}' not supported. Available languages: {', '.join(SUPPORTED_LANGUAGES.keys())}"
+        lang_code = SUPPORTED_LANGUAGES[target_lang]
+        if translation_model is None or translation_tokenizer is None:
+            raise Exception("Translation model not initialized")
+        match = re.search(r'how to say\s+(.+?)\s+in\s+(\w+)', text.lower())
+        if match:
+            text_to_translate = match.group(1)
+        else:
+            content_match = re.search(r'(?:translate|convert).*to\s+[a-zA-Z]+\s*[:\s]*(.+)', text, re.IGNORECASE)
+            text_to_translate = content_match.group(1) if content_match else text
+        translation_tokenizer.src_lang = "en"
+        encoded = translation_tokenizer(text_to_translate, return_tensors="pt", padding=True, truncation=True).to(translation_model.device)
+        start_time = time.time()
+        generated_tokens = translation_model.generate(
+            **encoded,
+            forced_bos_token_id=translation_tokenizer.get_lang_id(lang_code),
+            max_length=512,
+            num_beams=1,
+            early_stopping=True
+        )
+        translated_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        logger.info(f"Translation took {time.time() - start_time:.2f} seconds")
+        return translated_text
     except Exception as e:
+        logger.error(f"Translation error: {str(e)}", exc_info=True)
         return f"Translation error: {str(e)}"
 def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
+    """Enhanced intent detection with dynamic translation support"""
     target_language = "English"  # Default
     if file:
         content_type = file.content_type.lower() if file.content_type else ""
         filename = file.filename.lower() if file.filename else ""
+        # Added: Catch "what’s this" and "does this fly" first for images
+        if content_type.startswith('image/') and text:
+            text_lower = text.lower()
+            if "what’s this" in text_lower:
+                return "visual-qa", target_language
+            if "does this fly" in text_lower:
+                return "visual-qa", target_language
+            # Added: Broaden "fly" questions for VQA
+            if "fly" in text_lower and any(q in text_lower for q in ['does', 'can', 'will']):
+                return "visual-qa", target_language
         if content_type.startswith('image/'):
             if text and any(q in text.lower() for q in ['what is', 'what\'s', 'describe', 'tell me about', 'explain','how many', 'what color', 'is there', 'are they', 'does the']):
                 return "visual-qa", target_language
     if any(keyword in text_lower for keyword in ['chat', 'talk', 'converse', 'ask gemini']):
         return "chatbot", target_language
+    translate_patterns = [
+        r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
+        r'convert.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
+        r'how to say.*in\s+\[?([a-zA-Z]+)\]?:?\s*(.*)'
+    ]
+    for pattern in translate_patterns:
+        translate_match = re.search(pattern, text_lower)
+        if translate_match:
+            potential_lang = translate_match.group(1).lower()
+            if potential_lang in SUPPORTED_LANGUAGES:
+                target_language = potential_lang.capitalize()
+                return "translate", target_language
+            else:
+                logger.warning(f"Invalid language detected: {potential_lang}")
+                return "chatbot", target_language
     vqa_patterns = [
         r'how (many|much)',
     if any(re.search(pattern, text_lower) for pattern in summarization_patterns):
         return "summarize", target_language
     generation_patterns = [
         r'\b(write|generate|create|compose)\b',
     if len(text) > 100:
         return "summarize", target_language
+    if file and file.content_type and file.content_type.startswith('image/'):
+        if text and "what’s this" in text_lower:
+            return "visual-qa", target_language
+        if text and any(q in text_lower for q in ['does this', 'is this', 'can this']):
+            return "visual-qa", target_language
     return "chatbot", target_language
 class ProcessResponse(BaseModel):
     type: str
     additional_data: Optional[Dict[str, Any]] = None
+@app.get("/chatbot")
 async def chatbot_interface():
+    """Redirect to the static index.html file for the chatbot interface"""
+    return RedirectResponse(url="/static/index.html")
 @app.post("/chat")
 async def chat_endpoint(data: dict):
     if not message:
         raise HTTPException(status_code=400, detail="No message provided")
     try:
+        response = get_gemini_response(message)
         return {"response": response}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
     try:
         if intent == "chatbot":
+            response = get_gemini_response(text)
             return {"response": response, "type": "chat"}
         elif intent == "translate":
             content = await extract_text_from_file(file) if file else text
+            if "all languages" in text.lower():
+                translations = {}
+                phrase_to_translate = "I want to explore the stars" if "I want to explore the stars" in text else content
+                for lang, code in SUPPORTED_LANGUAGES.items():
+                    translation_tokenizer.src_lang = "en"
+                    encoded = translation_tokenizer(phrase_to_translate, return_tensors="pt").to(translation_model.device)
+                    generated_tokens = translation_model.generate(
+                        **encoded,
+                        forced_bos_token_id=translation_tokenizer.get_lang_id(code),
+                        max_length=512,
+                        num_beams=1
+                    )
+                    translations[lang] = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+                response = "\n".join(f"{lang.capitalize()}: {translations[lang]}" for lang in translations)
+                logger.info(f"Translated to all supported languages: {', '.join(translations.keys())}")
+                return {"response": response, "type": "translation"}
+            else:
+                translated_text = translate_text(content, target_language)
+                return {"response": translated_text, "type": "translation"}
         elif intent == "summarize":
             content = await extract_text_from_file(file) if file else text
             final_summary = re.sub(r'\s+', ' ', final_summary).strip()
             return {"response": final_summary, "type": "summary"}
         elif intent == "image-to-text":
             if not file or not file.content_type.startswith('image/'):
                 raise HTTPException(status_code=400, detail="An image file is required")
                 answer = answer.capitalize()
                 if not answer.endswith(('.', '!', '?')):
                     answer += '.'
+            chatbot = load_model("chatbot")
+            if "fly" in question.lower():
+                answer = chatbot.generate_content(f"Make this fun and spacey: {answer}").text.strip()
+            else:
+                answer = chatbot.generate_content(f"Make this cosmic and poetic: {answer}").text.strip()
             logger.info(f"Final VQA answer: {answer}")
                 df = pd.read_excel(io.BytesIO(file_content))
             code = generate_visualization_code(df, text)
+            stats = df.describe().to_string()
+            response = f"Stats:\n{stats}\n\nChart Code:\n{code}"
+            return {"response": response, "type": "visualization_code"}
         elif intent == "text-generation":
+            response = get_gemini_response(text, is_generation=True)
+            lines = response.split(". ")
+            formatted_poem = "\n".join(line.strip() + ("." if not line.endswith(".") else "") for line in lines if line)
+            return {"response": formatted_poem, "type": "generated_text"}
         else:
+            response = get_gemini_response(text or "Hello! How can I assist you?")
             return {"response": response, "type": "chat"}
     except Exception as e:
         logger.info(f"Request processed in {process_time:.2f} seconds")
 async def extract_text_from_file(file: UploadFile) -> str:
+    """Enhanced text extraction with multiple fallbacks"""
     if not file:
         return ""
     content = await file.read()
     filename = file.filename.lower()
     try:
         if filename.endswith('.pdf'):
             try:
                 doc = fitz.open(stream=content, filetype="pdf")
+                if doc.is_encrypted:
+                    return "PDF is encrypted and cannot be read"
                 text = ""
                 for page in doc:
                     text += page.get_text()
                 return text
             except Exception as pdf_error:
+                logger.warning(f"PyMuPDF failed: {str(pdf_error)}. Trying pdfminer.six...")
                 from pdfminer.high_level import extract_text
                 from io import BytesIO
                 return extract_text(BytesIO(content))
         elif filename.endswith(('.docx', '.doc')):
             doc = Document(io.BytesIO(content))
             return "\n".join(para.text for para in doc.paragraphs)
         elif filename.endswith('.txt'):
             return content.decode('utf-8', errors='replace')
         elif filename.endswith('.rtf'):
             text = content.decode('utf-8', errors='replace')
             text = re.sub(r'\\[a-z]+', ' ', text)
             text = re.sub(r'\{|\}|\\', '', text)
             return text
         else:
             raise HTTPException(status_code=400, detail=f"Unsupported file format: {filename}")
     except Exception as e:
         logger.error(f"File extraction error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error extracting text: {str(e)}. Supported formats: PDF, DOCX, TXT, RTF"
+        )
 def generate_visualization_code(df: pd.DataFrame, request: str = None) -> str:
     """Generate visualization code based on data analysis"""
 plt.show()
 print(df['{num_col}'].describe())"""
     else:
         return f"""import pandas as pd
 import matplotlib.pyplot as plt
 @app.get("/", include_in_schema=False)
 async def home():
     """Redirect to the static index.html file"""
+    return RedirectResponse(url="/static/tito.html")
 @app.get("/health", include_in_schema=True)
 async def health_check():
 @app.on_event("startup")
 async def startup_event():
     """Pre-load models at startup with timeout"""
+    global translation_model, translation_tokenizer
     logger.info("Starting model pre-loading...")
     async def load_model_with_timeout(task):
         try:
+            await asyncio.wait_for(asyncio.to_thread(load_model, task), timeout=60.0)
             logger.info(f"Successfully loaded {task} model")
         except asyncio.TimeoutError:
             logger.warning(f"Timeout loading {task} model - will load on demand")
         except Exception as e:
             logger.error(f"Error pre-loading {task}: {str(e)}")
+    try:
+        model_name = MODELS["translation"]
+        translation_model = M2M100ForConditionalGeneration.from_pretrained(model_name)
+        translation_tokenizer = M2M100Tokenizer.from_pretrained(model_name)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        translation_model.to(device)
+        logger.info("Translation model pre-loaded successfully")
+    except Exception as e:
+        logger.error(f"Error pre-loading translation model: {str(e)}")
     await asyncio.gather(
         load_model_with_timeout("summarization"),
         load_model_with_timeout("image-to-text"),
+        load_model_with_timeout("visual-qa"),
+        load_model_with_timeout("chatbot")
     )
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)