jatinmehra's picture
feat: implement user authentication and chat history management with SQLAlchemy
5610ef5
# crawlgpt/src/crawlgpt/ui/chat_app.py
# Description: Streamlit app for the chat interface of the CrawlGPT system with user authentication
import streamlit as st
import asyncio
import time
from datetime import datetime
import json
from src.crawlgpt.core.LLMBasedCrawler import Model
from src.crawlgpt.core.database import save_chat_message, get_chat_history, delete_user_chat_history, restore_chat_history
from src.crawlgpt.utils.monitoring import MetricsCollector, Metrics
from src.crawlgpt.utils.progress import ProgressTracker
from src.crawlgpt.utils.data_manager import DataManager
from src.crawlgpt.utils.content_validator import ContentValidator
from src.crawlgpt.ui.login import show_login
# Check authentication before any other processing
if 'user' not in st.session_state:
show_login()
st.stop() # Stop execution if not logged in
# Home Page Setup
st.set_page_config(
page_title="CrawlGPT πŸš€πŸ€–",
page_icon="πŸ€–",
layout="centered",
)
# Streamlit app title and description
st.title("CrawlGPT πŸš€πŸ€–")
st.write(
"This app extracts content from a URL, stores it in a vector database, and generates responses to user queries. "
"It also summarizes extracted content for efficient retrieval."
)
# Initialize components in session state
if "model" not in st.session_state:
st.session_state.model = Model()
st.session_state.data_manager = DataManager()
st.session_state.content_validator = ContentValidator()
st.session_state.messages = []
st.session_state.url_processed = False
if "use_summary" not in st.session_state:
st.session_state.use_summary = True
if "metrics" not in st.session_state:
st.session_state.metrics = MetricsCollector()
# Load chat history from database
if "messages" not in st.session_state:
st.session_state.messages = []
# Load user's chat history from database
history = get_chat_history(st.session_state.user.id)
st.session_state.messages = [{
"role": msg.role,
"content": msg.message,
"context": msg.context,
"timestamp": msg.timestamp
} for msg in history]
model = st.session_state.model
def load_chat_history():
"""Loads chat history and model state from database"""
try:
# Clear existing model state
model.clear()
# Load messages
st.session_state.messages = restore_chat_history(st.session_state.user.id)
# Rebuild model context from chat history
context_parts = [
msg['context'] for msg in st.session_state.messages
if msg.get('context')
]
model.context = "\n".join(context_parts)
# Rebuild vector database from context
if model.context:
chunks = model.chunk_text(model.context)
summaries = [model.summarizer.generate_summary(chunk) for chunk in chunks]
model.database.add_data(chunks, summaries)
st.session_state.url_processed = True
st.rerun()
except Exception as e:
st.error(f"Restoration failed: {str(e)}")
# Sidebar implementation
with st.sidebar:
st.subheader(f"πŸ‘€ User: {st.session_state.user.username}")
st.subheader("πŸ“Š System Metrics")
metrics = st.session_state.metrics.metrics.to_dict()
st.metric("Total Requests", metrics["total_requests"])
st.metric("Success Rate", f"{(metrics['successful_requests']/max(metrics['total_requests'], 1))*100:.1f}%")
st.metric("Avg Response Time", f"{metrics['average_response_time']:.2f}s")
# RAG Settings
st.subheader("πŸ”§ RAG Settings")
st.session_state.use_summary = st.checkbox("Use Summarized RAG", value=False, help="Don't use summarization when dealing with Coding Documentation.")
st.subheader("πŸ€– Normal LLM Settings")
temperature = st.slider("Temperature", 0.0, 1.0, 0.7, help="Controls the randomness of the generated text. Lower values are more deterministic.")
max_tokens = st.slider("Max Tokens", 500, 10000, 5000, help="Maximum number of tokens to generate in the response.")
model_id = st.radio("Model ID", ['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'mixtral-8x7b-32768'], help="Choose the model to use for generating responses.")
# Export/Import Data
st.subheader("πŸ’Ύ Data Management")
if st.button("Export Current State"):
try:
export_data = {
"metrics": metrics,
"vector_database": model.database.to_dict(),
"messages": st.session_state.messages
}
export_json = json.dumps(export_data)
st.session_state.export_json = export_json
st.success("Data exported successfully!")
except Exception as e:
st.error(f"Export failed: {e}")
if "export_json" in st.session_state:
st.download_button(
label="Download Backup",
data=st.session_state.export_json,
file_name=f"crawlgpt_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
mime="application/json"
)
uploaded_file = st.file_uploader("Import Previous State", type=['json'])
if uploaded_file is not None:
try:
imported_data = json.loads(uploaded_file.read())
# Validate imported data structure
required_keys = ["metrics", "vector_database", "messages"]
if not all(key in imported_data for key in required_keys):
raise ValueError("Invalid backup file structure")
# Import data with proper state management
model.import_state(imported_data)
# Restore chat history and context
if "messages" in imported_data:
st.session_state.messages = imported_data["messages"]
# Set URL processed state if there's context
if model.context:
st.session_state.url_processed = True
else:
st.session_state.url_processed = False
# Update metrics
if "metrics" in imported_data:
st.session_state.metrics = MetricsCollector()
st.session_state.metrics.metrics = Metrics.from_dict(imported_data["metrics"])
model.import_state(imported_data)
st.success("Data imported successfully! You can continue chatting.")
st.session_state.url_processed = True
except Exception as e:
st.error(f"Import failed: {e}")
st.session_state.url_processed = False
if st.button("♻️ Restore Full Chat State"):
with st.spinner("Rebuilding AI context..."):
load_chat_history()
st.success("Full conversation state restored!")
# URL Processing Section
url_col1, url_col2 = st.columns([3, 1])
with url_col1:
url = st.text_input("Enter URL:", help="Provide the URL to extract content from.")
with url_col2:
process_url = st.button("Process URL")
if process_url and url:
if not url.strip():
st.warning("Please enter a valid URL.")
else:
progress_bar = st.progress(0)
status_text = st.empty()
try:
if not st.session_state.content_validator.is_valid_url(url):
st.error("Invalid URL format")
else:
async def extract_content():
start_time = time.time()
progress = ProgressTracker(total_steps=4, operation_name="content_extraction")
try:
status_text.text("Validating URL...")
progress_bar.progress(25)
status_text.text("Crawling content...")
progress_bar.progress(50)
success, msg = await model.extract_content_from_url(url)
if success:
status_text.text("Processing content...")
progress_bar.progress(75)
status_text.text("Storing in database...")
progress_bar.progress(100)
st.session_state.metrics.record_request(
success=True,
response_time=time.time() - start_time,
tokens_used=len(model.context.split())
)
st.session_state.url_processed = True
st.session_state.messages.append({
"role": "system",
"content": f"Content from {url} processed",
"context": model.context # Store full context
})
else:
raise Exception(msg)
except Exception as e:
st.session_state.metrics.record_request(
success=False,
response_time=time.time() - start_time,
tokens_used=0
)
raise e
finally:
status_text.empty()
progress_bar.empty()
asyncio.run(extract_content())
except Exception as e:
st.error(f"Error processing URL: {e}")
# Chat Interface
st.subheader("πŸ’­ Chat Interface")
# Display chat messages
chat_container = st.container()
with chat_container:
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
# Chat input
if chat_input := st.chat_input("Ask about the content...", disabled=not st.session_state.url_processed):
# Display user message
with st.chat_message("user"):
st.write(chat_input)
# Add user message to history and database
st.session_state.messages.append({"role": "user", "content": chat_input})
save_chat_message(
st.session_state.user.id,
chat_input,
"user",
model.context # Store full context
)
try:
start_time = time.time()
# Show typing indicator
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = model.generate_response(
chat_input,
temperature,
max_tokens,
model_id,
use_summary=st.session_state.use_summary
)
st.write(response)
# Add assistant response to history and database
st.session_state.messages.append({"role": "assistant", "content": response})
save_chat_message(
st.session_state.user.id,
response, # Fixed: Save the assistant's response
"assistant", # Fixed: Correct role
model.context
)
# Record metrics
st.session_state.metrics.record_request(
success=True,
response_time=time.time() - start_time,
tokens_used=len(response.split())
)
except Exception as e:
st.session_state.metrics.record_request(
success=False,
response_time=time.time() - start_time,
tokens_used=0
)
st.error(f"Error generating response: {e}")
# Debug and Clear Options
col1, col2 = st.columns(2)
with col1:
if st.button("Clear Chat History"):
try:
delete_user_chat_history(st.session_state.user.id)
st.session_state.messages = []
st.session_state.url_processed = False
st.success("Chat history cleared!")
st.rerun()
except Exception as e:
st.error(f"Error clearing history: {e}")
with col2:
if st.button("Clear All Data"):
if st.checkbox("Confirm Clear"):
try:
model.clear()
st.session_state.messages = []
delete_user_chat_history(st.session_state.user.id)
st.session_state.url_processed = False
st.session_state.metrics = MetricsCollector()
st.success("All data cleared successfully.")
except Exception as e:
st.error(f"Error clearing data: {e}")
# Debug Information
if st.checkbox("Show Debug Info"):
st.subheader("πŸ” Debug Information")
col1, col2 = st.columns(2)
with col1:
st.write("Cache Information:")
st.write(model.cache)
with col2:
st.write("Current Metrics:")
st.write(metrics)
st.write("Current Context Preview:")
st.write(model.context[:500] if model.context else "No context available")