import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import os

os.environ["OPENAI_API_KEY"] = "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"
os.environ["SERPAPI_API_KEY"] = "8ccb7553d2c890be7bbbdc41e5ced77ab3732dfea760e42da3711778c98c074c"


# Define a function to load PDF and perform processing
def process_pdf(pdf_path):
    pdfreader = PdfReader(pdf_path)

    raw_text = ''
    for page in pdfreader.pages:
        content = page.extract_text()
        if content:
            raw_text += content

    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=800,
        chunk_overlap=100,
        length_function=len,
    )
    texts = text_splitter.split_text(raw_text)

    embeddings = OpenAIEmbeddings()
    document_search = FAISS.from_texts(texts, embeddings)

    chain = load_qa_chain(OpenAI(), chain_type="stuff")

    return document_search, chain

# Function to get yes/no emoji based on answer content
def get_answer_emoji(answer):
    answer = answer.lower()
    if "yes" in answer:
        return "✅"
    elif "no" in answer:
        return "❌"
    else:
        return "🟡"

# Streamlit UI
st.title("OrangePro AI - PDF and Text Analysis")

# Upload a PDF file
uploaded_pdf_file = st.file_uploader("Upload a PDF file for analysis", type=["pdf"])
uploaded_text_file = st.file_uploader("Upload a text file with questions (if available)", type=["txt"])

if uploaded_pdf_file:
    st.subheader("Selected PDF Content")

    # Display the content of the PDF
    pdf_reader, qa_chain = process_pdf(uploaded_pdf_file)
    
    st.write("PDF Content:")
    st.text(pdf_reader)

    if uploaded_text_file:
        st.warning("Questions will be extracted from the uploaded text file. Disabling question input below.")
        text_content = uploaded_text_file.read().decode('utf-8')  # Decode bytes to string
        questions = text_content.splitlines()
    else:
        # Allow the user to enter a list of questions
        questions = st.text_area("Enter a list of questions (one per line):").split('\n')
    
    if st.button("Analyze Questions"):
        # Perform question answering for each question
        st.subheader("Answers:")
        answer_summary = []
        yes_count = 0
        total_questions = len(questions)

        for question in questions:
            if question.strip() == "":
                continue
            docs = pdf_reader.similarity_search(question)
            answer = qa_chain.run(input_documents=docs, question=question)
            
            emoji = get_answer_emoji(answer)
            answer_summary.append([question, answer, emoji])

            if emoji == "✅":
                yes_count += 1

        # Calculate and display the percentage of "yes" answers
        if total_questions > 0:
            yes_percentage = (yes_count / total_questions) * 100
        else:
            yes_percentage = 0

        answer_summary.append(["Percentage of 'Yes' Answers", f"{yes_percentage:.2f}%", ""])
        
        # Display the summary in a table
        st.table(answer_summary)

# About section
st.sidebar.title("About OrangePro AI")
st.sidebar.info(
    "OrangePro AI is an artificial intelligence testing and benchmarking platform for large language models (LLMs). It scores model performance based on real-world scenarios, allowing corporate clients such as Fortune 500 companies to choose the best model for their specific use cases."
    "\n\n"
    "The platform automates scoring, ranking model performance in real-world scenarios and key criteria like hallucinations and safety. OrangePro AI also automatically generates adversarial test suites at a large scale and benchmarks models to help customers identify the best model for specific use cases."
)

# Footer
st.sidebar.text("Powered by Streamlit and Langchain")