import streamlit as st from PyPDF2 import PdfReader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.llms import OpenAI import os os.environ["OPENAI_API_KEY"] = "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" os.environ["SERPAPI_API_KEY"] = "8ccb7553d2c890be7bbbdc41e5ced77ab3732dfea760e42da3711778c98c074c" # Define a function to load PDF and perform processing def process_pdf(pdf_path): pdfreader = PdfReader(pdf_path) raw_text = '' for page in pdfreader.pages: content = page.extract_text() if content: raw_text += content text_splitter = CharacterTextSplitter( separator="\n", chunk_size=800, chunk_overlap=100, length_function=len, ) texts = text_splitter.split_text(raw_text) embeddings = OpenAIEmbeddings() document_search = FAISS.from_texts(texts, embeddings) chain = load_qa_chain(OpenAI(), chain_type="stuff") return document_search, chain # Function to get yes/no emoji based on answer content def get_answer_emoji(answer): answer = answer.lower() if "yes" in answer: return "✅" elif "no" in answer: return "❌" else: return "🟡" # Streamlit UI st.title("OrangePro AI - PDF and Text Analysis") # Upload a PDF file uploaded_pdf_file = st.file_uploader("Upload a PDF file for analysis", type=["pdf"]) uploaded_text_file = st.file_uploader("Upload a text file with questions (if available)", type=["txt"]) if uploaded_pdf_file: st.subheader("Selected PDF Content") # Display the content of the PDF pdf_reader, qa_chain = process_pdf(uploaded_pdf_file) st.write("PDF Content:") st.text(pdf_reader) if uploaded_text_file: st.warning("Questions will be extracted from the uploaded text file. Disabling question input below.") text_content = uploaded_text_file.read().decode('utf-8') # Decode bytes to string questions = text_content.splitlines() else: # Allow the user to enter a list of questions questions = st.text_area("Enter a list of questions (one per line):").split('\n') if st.button("Analyze Questions"): # Perform question answering for each question st.subheader("Answers:") answer_summary = [] yes_count = 0 total_questions = len(questions) for question in questions: if question.strip() == "": continue docs = pdf_reader.similarity_search(question) answer = qa_chain.run(input_documents=docs, question=question) emoji = get_answer_emoji(answer) answer_summary.append([question, answer, emoji]) if emoji == "✅": yes_count += 1 # Calculate and display the percentage of "yes" answers if total_questions > 0: yes_percentage = (yes_count / total_questions) * 100 else: yes_percentage = 0 answer_summary.append(["Percentage of 'Yes' Answers", f"{yes_percentage:.2f}%", ""]) # Display the summary in a table st.table(answer_summary) # About section st.sidebar.title("About OrangePro AI") st.sidebar.info( "OrangePro AI is an artificial intelligence testing and benchmarking platform for large language models (LLMs). It scores model performance based on real-world scenarios, allowing corporate clients such as Fortune 500 companies to choose the best model for their specific use cases." "\n\n" "The platform automates scoring, ranking model performance in real-world scenarios and key criteria like hallucinations and safety. OrangePro AI also automatically generates adversarial test suites at a large scale and benchmarks models to help customers identify the best model for specific use cases." ) # Footer st.sidebar.text("Powered by Streamlit and Langchain")