Spaces:

jatinmehra
/

CRAWL-GPT-CHAT

Sleeping

App Files Files Community

CRAWL-GPT-CHAT / src /crawlgpt /ui /chat_app.py

jatinmehra

feat: implement user authentication and chat history management with SQLAlchemy

5610ef5 5 months ago

raw

history blame contribute delete

13.3 kB

	# crawlgpt/src/crawlgpt/ui/chat_app.py
	# Description: Streamlit app for the chat interface of the CrawlGPT system with user authentication
	import streamlit as st
	import asyncio
	import time
	from datetime import datetime
	import json
	from src.crawlgpt.core.LLMBasedCrawler import Model
	from src.crawlgpt.core.database import save_chat_message, get_chat_history, delete_user_chat_history, restore_chat_history
	from src.crawlgpt.utils.monitoring import MetricsCollector, Metrics
	from src.crawlgpt.utils.progress import ProgressTracker
	from src.crawlgpt.utils.data_manager import DataManager
	from src.crawlgpt.utils.content_validator import ContentValidator
	from src.crawlgpt.ui.login import show_login

	# Check authentication before any other processing
	if 'user' not in st.session_state:
	show_login()
	st.stop() # Stop execution if not logged in

	# Home Page Setup
	st.set_page_config(
	page_title="CrawlGPT 🚀🤖",
	page_icon="🤖",
	layout="centered",
	)

	# Streamlit app title and description
	st.title("CrawlGPT 🚀🤖")
	st.write(
	"This app extracts content from a URL, stores it in a vector database, and generates responses to user queries. "
	"It also summarizes extracted content for efficient retrieval."
	)

	# Initialize components in session state
	if "model" not in st.session_state:
	st.session_state.model = Model()
	st.session_state.data_manager = DataManager()
	st.session_state.content_validator = ContentValidator()
	st.session_state.messages = []
	st.session_state.url_processed = False

	if "use_summary" not in st.session_state:
	st.session_state.use_summary = True

	if "metrics" not in st.session_state:
	st.session_state.metrics = MetricsCollector()

	# Load chat history from database
	if "messages" not in st.session_state:
	st.session_state.messages = []
	# Load user's chat history from database
	history = get_chat_history(st.session_state.user.id)
	st.session_state.messages = [{
	"role": msg.role,
	"content": msg.message,
	"context": msg.context,
	"timestamp": msg.timestamp
	} for msg in history]

	model = st.session_state.model

	def load_chat_history():
	"""Loads chat history and model state from database"""
	try:
	# Clear existing model state
	model.clear()

	# Load messages
	st.session_state.messages = restore_chat_history(st.session_state.user.id)

	# Rebuild model context from chat history
	context_parts = [
	msg['context'] for msg in st.session_state.messages
	if msg.get('context')
	]
	model.context = "\n".join(context_parts)

	# Rebuild vector database from context
	if model.context:
	chunks = model.chunk_text(model.context)
	summaries = [model.summarizer.generate_summary(chunk) for chunk in chunks]
	model.database.add_data(chunks, summaries)
	st.session_state.url_processed = True

	st.rerun()

	except Exception as e:
	st.error(f"Restoration failed: {str(e)}")

	# Sidebar implementation
	with st.sidebar:
	st.subheader(f"👤 User: {st.session_state.user.username}")

	st.subheader("📊 System Metrics")
	metrics = st.session_state.metrics.metrics.to_dict()
	st.metric("Total Requests", metrics["total_requests"])
	st.metric("Success Rate", f"{(metrics['successful_requests']/max(metrics['total_requests'], 1))*100:.1f}%")
	st.metric("Avg Response Time", f"{metrics['average_response_time']:.2f}s")

	# RAG Settings
	st.subheader("🔧 RAG Settings")
	st.session_state.use_summary = st.checkbox("Use Summarized RAG", value=False, help="Don't use summarization when dealing with Coding Documentation.")
	st.subheader("🤖 Normal LLM Settings")
	temperature = st.slider("Temperature", 0.0, 1.0, 0.7, help="Controls the randomness of the generated text. Lower values are more deterministic.")
	max_tokens = st.slider("Max Tokens", 500, 10000, 5000, help="Maximum number of tokens to generate in the response.")
	model_id = st.radio("Model ID", ['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'mixtral-8x7b-32768'], help="Choose the model to use for generating responses.")

	# Export/Import Data
	st.subheader("💾 Data Management")
	if st.button("Export Current State"):
	try:
	export_data = {
	"metrics": metrics,
	"vector_database": model.database.to_dict(),
	"messages": st.session_state.messages
	}
	export_json = json.dumps(export_data)
	st.session_state.export_json = export_json
	st.success("Data exported successfully!")
	except Exception as e:
	st.error(f"Export failed: {e}")

	if "export_json" in st.session_state:
	st.download_button(
	label="Download Backup",
	data=st.session_state.export_json,
	file_name=f"crawlgpt_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
	mime="application/json"
	)

	uploaded_file = st.file_uploader("Import Previous State", type=['json'])
	if uploaded_file is not None:
	try:
	imported_data = json.loads(uploaded_file.read())

	# Validate imported data structure
	required_keys = ["metrics", "vector_database", "messages"]
	if not all(key in imported_data for key in required_keys):
	raise ValueError("Invalid backup file structure")

	# Import data with proper state management
	model.import_state(imported_data)

	# Restore chat history and context
	if "messages" in imported_data:
	st.session_state.messages = imported_data["messages"]

	# Set URL processed state if there's context
	if model.context:
	st.session_state.url_processed = True
	else:
	st.session_state.url_processed = False

	# Update metrics
	if "metrics" in imported_data:
	st.session_state.metrics = MetricsCollector()
	st.session_state.metrics.metrics = Metrics.from_dict(imported_data["metrics"])
	model.import_state(imported_data)

	st.success("Data imported successfully! You can continue chatting.")
	st.session_state.url_processed = True

	except Exception as e:
	st.error(f"Import failed: {e}")
	st.session_state.url_processed = False

	if st.button("♻️ Restore Full Chat State"):
	with st.spinner("Rebuilding AI context..."):
	load_chat_history()
	st.success("Full conversation state restored!")

	# URL Processing Section
	url_col1, url_col2 = st.columns([3, 1])
	with url_col1:
	url = st.text_input("Enter URL:", help="Provide the URL to extract content from.")
	with url_col2:
	process_url = st.button("Process URL")

	if process_url and url:
	if not url.strip():
	st.warning("Please enter a valid URL.")
	else:
	progress_bar = st.progress(0)
	status_text = st.empty()

	try:
	if not st.session_state.content_validator.is_valid_url(url):
	st.error("Invalid URL format")
	else:
	async def extract_content():
	start_time = time.time()
	progress = ProgressTracker(total_steps=4, operation_name="content_extraction")

	try:
	status_text.text("Validating URL...")
	progress_bar.progress(25)

	status_text.text("Crawling content...")
	progress_bar.progress(50)
	success, msg = await model.extract_content_from_url(url)

	if success:
	status_text.text("Processing content...")
	progress_bar.progress(75)

	status_text.text("Storing in database...")
	progress_bar.progress(100)

	st.session_state.metrics.record_request(
	success=True,
	response_time=time.time() - start_time,
	tokens_used=len(model.context.split())
	)

	st.session_state.url_processed = True
	st.session_state.messages.append({
	"role": "system",
	"content": f"Content from {url} processed",
	"context": model.context # Store full context
	})
	else:
	raise Exception(msg)

	except Exception as e:
	st.session_state.metrics.record_request(
	success=False,
	response_time=time.time() - start_time,
	tokens_used=0
	)
	raise e
	finally:
	status_text.empty()
	progress_bar.empty()

	asyncio.run(extract_content())

	except Exception as e:
	st.error(f"Error processing URL: {e}")

	# Chat Interface
	st.subheader("💭 Chat Interface")

	# Display chat messages
	chat_container = st.container()
	with chat_container:
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.write(message["content"])

	# Chat input
	if chat_input := st.chat_input("Ask about the content...", disabled=not st.session_state.url_processed):
	# Display user message
	with st.chat_message("user"):
	st.write(chat_input)

	# Add user message to history and database
	st.session_state.messages.append({"role": "user", "content": chat_input})
	save_chat_message(
	st.session_state.user.id,
	chat_input,
	"user",
	model.context # Store full context
	)
	try:
	start_time = time.time()

	# Show typing indicator
	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	response = model.generate_response(
	chat_input,
	temperature,
	max_tokens,
	model_id,
	use_summary=st.session_state.use_summary
	)
	st.write(response)

	# Add assistant response to history and database
	st.session_state.messages.append({"role": "assistant", "content": response})
	save_chat_message(
	st.session_state.user.id,
	response, # Fixed: Save the assistant's response
	"assistant", # Fixed: Correct role
	model.context
	)
	# Record metrics
	st.session_state.metrics.record_request(
	success=True,
	response_time=time.time() - start_time,
	tokens_used=len(response.split())
	)

	except Exception as e:
	st.session_state.metrics.record_request(
	success=False,
	response_time=time.time() - start_time,
	tokens_used=0
	)
	st.error(f"Error generating response: {e}")

	# Debug and Clear Options
	col1, col2 = st.columns(2)
	with col1:
	if st.button("Clear Chat History"):
	try:
	delete_user_chat_history(st.session_state.user.id)
	st.session_state.messages = []
	st.session_state.url_processed = False
	st.success("Chat history cleared!")
	st.rerun()
	except Exception as e:
	st.error(f"Error clearing history: {e}")

	with col2:
	if st.button("Clear All Data"):
	if st.checkbox("Confirm Clear"):
	try:
	model.clear()
	st.session_state.messages = []
	delete_user_chat_history(st.session_state.user.id)
	st.session_state.url_processed = False
	st.session_state.metrics = MetricsCollector()
	st.success("All data cleared successfully.")
	except Exception as e:
	st.error(f"Error clearing data: {e}")

	# Debug Information
	if st.checkbox("Show Debug Info"):
	st.subheader("🔍 Debug Information")
	col1, col2 = st.columns(2)

	with col1:
	st.write("Cache Information:")
	st.write(model.cache)

	with col2:
	st.write("Current Metrics:")
	st.write(metrics)

	st.write("Current Context Preview:")
	st.write(model.context[:500] if model.context else "No context available")