import os
import faiss
import numpy as np
import gradio as gr
import PyPDF2
import uuid
from collections import deque
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer
from huggingface_hub import login

# Authentication
login(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))

# Load AI Model
model_name = "Qwen/Qwen2.5-7B-Instruct-1M"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
    max_memory={0: "22GiB", "cpu": "6GiB"}  # Prevent VRAM overflow
)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Sentence Embedding Model
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# FAISS Indexes with HNSW for optimized retrieval
embedding_dim = 384
doc_index = faiss.IndexHNSWFlat(embedding_dim, 32)  # More efficient than IndexFlatL2
chat_index = faiss.IndexHNSWFlat(embedding_dim, 32)
doc_texts = []

# Session-based Memory
chat_sessions = {}
current_session_id = None
SESSION_HISTORY_LIMIT = 5

# Authentication
SECRET_PASSWORD = os.getenv("APP_SECRET_PASSWORD")
authenticated = False

def verify_password(password):
    global authenticated
    authenticated = password == SECRET_PASSWORD
    return "Access Granted!" if authenticated else "Invalid Password!"

# Chat Session Management
def start_new_session():
    global current_session_id, chat_sessions
    current_session_id = str(uuid.uuid4())
    chat_sessions[current_session_id] = deque(maxlen=SESSION_HISTORY_LIMIT)
    return current_session_id

def get_embedding(text):
    return embedding_model.encode(text, normalize_embeddings=True)

def store_chat_in_session(user_input, response, reference):
    if current_session_id is None:
        start_new_session()
    chat_sessions[current_session_id].append((user_input, response, reference))
    chat_index.add(np.array([get_embedding(response)]))

def get_recent_chat_history():
    if current_session_id in chat_sessions:
        return "\n".join([f"User: {q}\nAI: {r}\nReference: {ref}" for q, r, ref in chat_sessions[current_session_id]])
    return ""

# Document Processing
def process_pdf(pdf_file):
    if not authenticated:
        return "Access Denied!"
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    document_text = " ".join([page.extract_text().replace("\n", " ") for page in pdf_reader.pages if page.extract_text()])
    text_chunks = document_text.split(". ")
    embeddings = np.array([get_embedding(chunk) for chunk in text_chunks])
    doc_index.add(embeddings)
    doc_texts.extend(text_chunks)
    return "Doc Processed."

# Retrieve Relevant HR Policy Passages
def retrieve_relevant_passage(query, top_k=3):
    if not authenticated:
        return "Access Denied!"
    query_embedding = get_embedding(query)
    D, I = doc_index.search(np.array([query_embedding]), top_k)
    valid_indices = [i for i in I[0] if 0 <= i < len(doc_texts)]
    if valid_indices:
        return "\n".join([f"- {doc_texts[i]}" for i in valid_indices]), "\n".join([doc_texts[i] for i in valid_indices])
    return "No relevant document found.", ""

# Retrieve Chat Context
def retrieve_chat_context(user_input, top_k=3):
    if not authenticated:
        return ""
    query_embedding = get_embedding(user_input)
    retrieved_texts = []
    if chat_index.ntotal > 0:
        D, I = chat_index.search(np.array([query_embedding]), top_k)
        retrieved_texts = [chat_sessions[current_session_id][i][1] for i in I[0] if i < len(chat_sessions[current_session_id])]
    return f"{get_recent_chat_history()}\n{''.join(retrieved_texts)}"

# AI Chatbot with Streaming
def chat_with_pdf(user_input, chat_history=[]):
    if not authenticated:
        return "Access Denied!", chat_history
    relevant_passage, reference = retrieve_relevant_passage(user_input)
    past_chat_context = get_recent_chat_history()
    prompt = (
        "You are an HR assistant. Provide responses based on company policies. If unsure, say 'Please contact HR'.\n\n"
        f"Recent Chat:\n{past_chat_context}\nHR Policy Context:\n{relevant_passage}\nUser Inquiry: {user_input}\nAI Response:"
    )
    
    def response_generator():
        response = text_generator(
            prompt, max_new_tokens=1024, do_sample=True, temperature=0.3, top_p=0.85, repetition_penalty=1.2,
            return_full_text=False
        )
        answer = response[0]['generated_text'].split("AI Response:")[-1].strip()
        store_chat_in_session(user_input, answer, reference)
        formatted_response = f"{answer}\n\n*Reference:* _{reference}_"
        yield formatted_response
    
    return response_generator(), chat_history

# Gradio Interface
with gr.Blocks() as chat_ui:
    gr.Markdown("# 📄 HR-Talk")
    with gr.Accordion("Authenticator", open=False):
        password_input = gr.Textbox(placeholder="Enter Password", type="password", interactive=True, scale=3, show_label=False)
        verify_button = gr.Button("✅ Verify", variant="primary", scale=1)
        access_status = gr.Label(value="Status", scale=2)
        verify_button.click(verify_password, inputs=[password_input], outputs=[access_status])
    
    with gr.Accordion("Document Feeder", open=False):
        file_upload = gr.File(label="📂 Upload PDF", file_types=[".pdf"], interactive=True, scale=5)
        upload_btn = gr.Button("📤 Process PDF", variant="primary", scale=2)
        status = gr.Label(value="Waiting for upload...", scale=3)
        upload_btn.click(process_pdf, inputs=[file_upload], outputs=[status])
    
    chatbot = gr.Chatbot()
    user_input = gr.Textbox(placeholder="Type your message...", show_label=False, scale=8)
    send_btn = gr.Button("Send", scale=2)
    
    def stream_response(user_input, chat_history):
        response_generator, chat_history = chat_with_pdf(user_input, chat_history)
        full_response = ""
        for word in response_generator:
            full_response += word
            yield chat_history[:-1] + [(user_input, full_response)]
        chat_history.append((user_input, full_response))
        yield chat_history
    
    send_btn.click(stream_response, inputs=[user_input, chatbot], outputs=[chatbot])

if __name__ == "__main__":
    chat_ui.launch()