import streamlit as st
from PyPDF2 import PdfReader
from anthropic import Anthropic
from prompts import DIFFERENTIATE_PROMPT
def extract_differences(input_text):
input_text = input_text.strip()
qa_pairs = input_text.split('')
# Initialize an empty list to hold the parsed dictionary objects
parsed_data = []
# Iterate over each QA pair
for pair in qa_pairs:
# Check if the pair has both question and answer (ignoring the last one)
if '' in pair and '' in pair and '' in pair and '' in pair and '' in pair and '' in pair:
# Extract the question and answer text
text1 = pair.split('')[1].split('')[0]
text2 = pair.split('')[1].split('')[0]
explanation = pair.split('')[1].split('')[0]
# Create a dictionary for the current pair and append it to the list
parsed_data.append({'text1': text1.strip(), 'text2': text2.strip(), 'explanation': explanation.strip()})
return parsed_data
st.cache_data()
def make_llm_api_call(prompt):
client = Anthropic()
message = client.messages.create(
model="claude-3-haiku-20240307",
max_tokens=4096,
temperature=0,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
)
return message
def get_llm_response(extractedtext1, extractedtext2):
prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2)
message = make_llm_api_call(prompt)
message_text = message.content[0].text
before_differences = message_text.split("")[0]
after_differences = message_text.split("")[1]
try:
differences_list = extract_differences(message_text.split("")[1].split("")[0].strip())
except Exception as e:
return message_text, []
difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------" for d in differences_list])
display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
return display_text, differences_list
def extract_text_with_pypdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text
def main():
st.set_page_config(layout="wide") # Enable wide layout
st.markdown('
' + '
PDF Upload and Compare App
' + '', unsafe_allow_html=True)
# Create columns for side-by-side buttons
col1, col2 = st.columns([2, 2])
# Add upload button to left column
uploaded_file1 = col1.file_uploader("**Text 1**", type="pdf")
# Add upload button to right column
uploaded_file2 = col2.file_uploader("**Text 2**", type="pdf")
# Check if both files are uploaded
if uploaded_file1 and uploaded_file2:
# Get filenames from uploaded files
filename1 = uploaded_file1.name
filename2 = uploaded_file2.name
try:
extracted_text1 = extract_text_with_pypdf(uploaded_file1)
extracted_text2 = extract_text_with_pypdf(uploaded_file2)
with col1.expander(filename1):
st.write(extracted_text1)
with col2.expander(filename2):
st.write(extracted_text2)
st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
except Exception as e:
st.error(f"Error saving files: {str(e)}")
# Add button at the bottom to run Find Differences function
if st.button("Find Differences"):
try:
display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
display_text1 = extracted_text1
display_text2 = extracted_text2
for diff in parsed_data:
diff_text1 = diff['text1'].strip()
diff_text2 = diff['text2'].strip()
display_text1 = f":red:[{diff_text1}]".join(display_text1.split(diff_text1)) if diff_text1 in display_text1 else display_text1
display_text2 = f":red:[{diff_text2}]".join(display_text2.split(diff_text2)) if diff_text2 in display_text2 else display_text2
with col1.expander(f"{filename1} Difference Highlighted"):
st.write(display_text1)
with col2.expander(f"{filename2} Difference Highlighted"):
st.write(display_text2)
st.markdown(display_text)
except Exception as e:
st.error(f"Error finding differences: {str(e)}")
if __name__ == "__main__":
main()