Spaces:

jatinmehra
/

CRAWL-GPT-CHAT

Running

File size: 13,274 Bytes

# crawlgpt/src/crawlgpt/ui/chat_app.py
# Description: Streamlit app for the chat interface of the CrawlGPT system with user authentication
import streamlit as st
import asyncio
import time
from datetime import datetime
import json
from src.crawlgpt.core.LLMBasedCrawler import Model
from src.crawlgpt.core.database import save_chat_message, get_chat_history, delete_user_chat_history, restore_chat_history
from src.crawlgpt.utils.monitoring import MetricsCollector, Metrics
from src.crawlgpt.utils.progress import ProgressTracker
from src.crawlgpt.utils.data_manager import DataManager
from src.crawlgpt.utils.content_validator import ContentValidator
from src.crawlgpt.ui.login import show_login

# Check authentication before any other processing
if 'user' not in st.session_state:
    show_login()
    st.stop()  # Stop execution if not logged in

# Home Page Setup 
st.set_page_config(
    page_title="CrawlGPT 🚀🤖", 
    page_icon="🤖", 
    layout="centered",
)

# Streamlit app title and description
st.title("CrawlGPT 🚀🤖")
st.write(
    "This app extracts content from a URL, stores it in a vector database, and generates responses to user queries. "
    "It also summarizes extracted content for efficient retrieval."
)

# Initialize components in session state
if "model" not in st.session_state:
    st.session_state.model = Model()
    st.session_state.data_manager = DataManager()
    st.session_state.content_validator = ContentValidator()
    st.session_state.messages = []
    st.session_state.url_processed = False

if "use_summary" not in st.session_state:
    st.session_state.use_summary = True

if "metrics" not in st.session_state:
    st.session_state.metrics = MetricsCollector()

# Load chat history from database
if "messages" not in st.session_state:
    st.session_state.messages = []
    # Load user's chat history from database
    history = get_chat_history(st.session_state.user.id)
    st.session_state.messages = [{
        "role": msg.role,
        "content": msg.message,
        "context": msg.context,
        "timestamp": msg.timestamp
    } for msg in history]

model = st.session_state.model

def load_chat_history():
    """Loads chat history and model state from database"""
    try:
        # Clear existing model state
        model.clear()
        
        # Load messages
        st.session_state.messages = restore_chat_history(st.session_state.user.id)
        
        # Rebuild model context from chat history
        context_parts = [
            msg['context'] for msg in st.session_state.messages 
            if msg.get('context')
        ]
        model.context = "\n".join(context_parts)
        
        # Rebuild vector database from context
        if model.context:
            chunks = model.chunk_text(model.context)
            summaries = [model.summarizer.generate_summary(chunk) for chunk in chunks]
            model.database.add_data(chunks, summaries)
            st.session_state.url_processed = True
            
        st.rerun()
        
    except Exception as e:
        st.error(f"Restoration failed: {str(e)}")

# Sidebar implementation
with st.sidebar:
    st.subheader(f"👤 User: {st.session_state.user.username}")
    
    st.subheader("📊 System Metrics")
    metrics = st.session_state.metrics.metrics.to_dict()
    st.metric("Total Requests", metrics["total_requests"])
    st.metric("Success Rate", f"{(metrics['successful_requests']/max(metrics['total_requests'], 1))*100:.1f}%")
    st.metric("Avg Response Time", f"{metrics['average_response_time']:.2f}s")
    
    # RAG Settings
    st.subheader("🔧 RAG Settings")
    st.session_state.use_summary = st.checkbox("Use Summarized RAG", value=False, help="Don't use summarization when dealing with Coding Documentation.")
    st.subheader("🤖 Normal LLM Settings")
    temperature = st.slider("Temperature", 0.0, 1.0, 0.7, help="Controls the randomness of the generated text. Lower values are more deterministic.")
    max_tokens = st.slider("Max Tokens", 500, 10000, 5000, help="Maximum number of tokens to generate in the response.")
    model_id = st.radio("Model ID", ['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'mixtral-8x7b-32768'], help="Choose the model to use for generating responses.")
    
    # Export/Import Data
    st.subheader("💾 Data Management")
    if st.button("Export Current State"):
        try:
            export_data = {
                "metrics": metrics,
                "vector_database": model.database.to_dict(),
                "messages": st.session_state.messages
            }
            export_json = json.dumps(export_data)
            st.session_state.export_json = export_json
            st.success("Data exported successfully!")
        except Exception as e:
            st.error(f"Export failed: {e}")

    if "export_json" in st.session_state:
        st.download_button(
            label="Download Backup",
            data=st.session_state.export_json,
            file_name=f"crawlgpt_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
            mime="application/json"
        )

    uploaded_file = st.file_uploader("Import Previous State", type=['json'])
    if uploaded_file is not None:
        try:
            imported_data = json.loads(uploaded_file.read())
            
            # Validate imported data structure
            required_keys = ["metrics", "vector_database", "messages"]
            if not all(key in imported_data for key in required_keys):
                raise ValueError("Invalid backup file structure")
                
            # Import data with proper state management
            model.import_state(imported_data)
            
            # Restore chat history and context
            if "messages" in imported_data:
                st.session_state.messages = imported_data["messages"]
                
            # Set URL processed state if there's context
            if model.context:
                st.session_state.url_processed = True
            else:
                st.session_state.url_processed = False
                
            # Update metrics
            if "metrics" in imported_data:
                st.session_state.metrics = MetricsCollector()
                st.session_state.metrics.metrics = Metrics.from_dict(imported_data["metrics"])
                model.import_state(imported_data)
                
            st.success("Data imported successfully! You can continue chatting.")
            st.session_state.url_processed = True
            
        except Exception as e:
            st.error(f"Import failed: {e}")
            st.session_state.url_processed = False
            
    if st.button("♻️ Restore Full Chat State"):
        with st.spinner("Rebuilding AI context..."):
            load_chat_history()
        st.success("Full conversation state restored!")

# URL Processing Section
url_col1, url_col2 = st.columns([3, 1])
with url_col1:
    url = st.text_input("Enter URL:", help="Provide the URL to extract content from.")
with url_col2:
    process_url = st.button("Process URL")

if process_url and url:
    if not url.strip():
        st.warning("Please enter a valid URL.")
    else:
        progress_bar = st.progress(0)
        status_text = st.empty()
        
        try:
            if not st.session_state.content_validator.is_valid_url(url):
                st.error("Invalid URL format")
            else:
                async def extract_content():
                    start_time = time.time()
                    progress = ProgressTracker(total_steps=4, operation_name="content_extraction")
                    
                    try:
                        status_text.text("Validating URL...")
                        progress_bar.progress(25)
                        
                        status_text.text("Crawling content...")
                        progress_bar.progress(50)
                        success, msg = await model.extract_content_from_url(url)
                        
                        if success:
                            status_text.text("Processing content...")
                            progress_bar.progress(75)
                            
                            status_text.text("Storing in database...")
                            progress_bar.progress(100)
                            
                            st.session_state.metrics.record_request(
                                success=True,
                                response_time=time.time() - start_time,
                                tokens_used=len(model.context.split())
                            )
                            
                            st.session_state.url_processed = True
                            st.session_state.messages.append({
                                                                "role": "system",
                                                                "content": f"Content from {url} processed",
                                                                "context": model.context  # Store full context
                                                            })
                        else:
                            raise Exception(msg)
                            
                    except Exception as e:
                        st.session_state.metrics.record_request(
                            success=False,
                            response_time=time.time() - start_time,
                            tokens_used=0
                        )
                        raise e
                    finally:
                        status_text.empty()
                        progress_bar.empty()

                asyncio.run(extract_content())
                
        except Exception as e:
            st.error(f"Error processing URL: {e}")

# Chat Interface
st.subheader("💭 Chat Interface")

# Display chat messages
chat_container = st.container()
with chat_container:
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.write(message["content"])

# Chat input
if chat_input := st.chat_input("Ask about the content...", disabled=not st.session_state.url_processed):
    # Display user message
    with st.chat_message("user"):
        st.write(chat_input)
    
    # Add user message to history and database
    st.session_state.messages.append({"role": "user", "content": chat_input})
    save_chat_message(
        st.session_state.user.id,
        chat_input,
        "user",
        model.context  # Store full context
    )
    try:
        start_time = time.time()
        
        # Show typing indicator
        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                response = model.generate_response(
                    chat_input,
                    temperature,
                    max_tokens,
                    model_id,
                    use_summary=st.session_state.use_summary
                )
                st.write(response)
        
        # Add assistant response to history and database
        st.session_state.messages.append({"role": "assistant", "content": response})
        save_chat_message(
            st.session_state.user.id,
            response,  # Fixed: Save the assistant's response
            "assistant",  # Fixed: Correct role
            model.context
        )
        # Record metrics
        st.session_state.metrics.record_request(
            success=True,
            response_time=time.time() - start_time,
            tokens_used=len(response.split())
        )

    except Exception as e:
        st.session_state.metrics.record_request(
            success=False,
            response_time=time.time() - start_time,
            tokens_used=0
        )
        st.error(f"Error generating response: {e}")

# Debug and Clear Options
col1, col2 = st.columns(2)
with col1:
    if st.button("Clear Chat History"):
        try:
            delete_user_chat_history(st.session_state.user.id)
            st.session_state.messages = []
            st.session_state.url_processed = False
            st.success("Chat history cleared!")
            st.rerun()
        except Exception as e:
            st.error(f"Error clearing history: {e}")

with col2:
    if st.button("Clear All Data"):
        if st.checkbox("Confirm Clear"):
            try:
                model.clear()
                st.session_state.messages = []
                delete_user_chat_history(st.session_state.user.id)
                st.session_state.url_processed = False
                st.session_state.metrics = MetricsCollector()
                st.success("All data cleared successfully.")
            except Exception as e:
                st.error(f"Error clearing data: {e}")

# Debug Information
if st.checkbox("Show Debug Info"):
    st.subheader("🔍 Debug Information")
    col1, col2 = st.columns(2)
    
    with col1:
        st.write("Cache Information:")
        st.write(model.cache)
    
    with col2:
        st.write("Current Metrics:")
        st.write(metrics)
    
    st.write("Current Context Preview:")
    st.write(model.context[:500] if model.context else "No context available")