import streamlit as st from PyPDF2 import PdfReader from anthropic import Anthropic from prompts import DIFFERENTIATE_PROMPT def extract_differences(input_text): input_text = input_text.strip() qa_pairs = input_text.split('') # Initialize an empty list to hold the parsed dictionary objects parsed_data = [] # Iterate over each QA pair for pair in qa_pairs: # Check if the pair has both question and answer (ignoring the last one) if '' in pair and '' in pair and '' in pair and '' in pair and '' in pair and '' in pair: # Extract the question and answer text text1 = pair.split('')[1].split('')[0] text2 = pair.split('')[1].split('')[0] explanation = pair.split('')[1].split('')[0] # Create a dictionary for the current pair and append it to the list parsed_data.append({'text1': text1.strip(), 'text2': text2.strip(), 'explanation': explanation.strip()}) return parsed_data st.cache_data() def make_llm_api_call(prompt): client = Anthropic() message = client.messages.create( model="claude-3-haiku-20240307", max_tokens=4096, temperature=0, messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}], ) return message def get_llm_response(extractedtext1, extractedtext2): prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2) message = make_llm_api_call(prompt) message_text = message.content[0].text before_differences = message_text.split("")[0] after_differences = message_text.split("")[1] try: differences_list = extract_differences(message_text.split("")[1].split("")[0].strip()) except Exception as e: return message_text, [] difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------" for d in differences_list]) display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}" return display_text, differences_list def extract_text_with_pypdf(pdf_path): reader = PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text def main(): st.set_page_config(layout="wide") # Enable wide layout st.markdown('
' + '

PDF Upload and Compare App

' + '
', unsafe_allow_html=True) # Create columns for side-by-side buttons col1, col2 = st.columns([2, 2]) # Add upload button to left column uploaded_file1 = col1.file_uploader("**Text 1**", type="pdf") # Add upload button to right column uploaded_file2 = col2.file_uploader("**Text 2**", type="pdf") # Check if both files are uploaded if uploaded_file1 and uploaded_file2: # Get filenames from uploaded files filename1 = uploaded_file1.name filename2 = uploaded_file2.name try: extracted_text1 = extract_text_with_pypdf(uploaded_file1) extracted_text2 = extract_text_with_pypdf(uploaded_file2) with col1.expander(filename1): st.write(extracted_text1) with col2.expander(filename2): st.write(extracted_text2) st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.") except Exception as e: st.error(f"Error saving files: {str(e)}") # Add button at the bottom to run Find Differences function if st.button("Find Differences"): try: display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2) display_text1 = extracted_text1 display_text2 = extracted_text2 for diff in parsed_data: diff_text1 = diff['text1'].strip() diff_text2 = diff['text2'].strip() display_text1 = f":red:[{diff_text1}]".join(display_text1.split(diff_text1)) if diff_text1 in display_text1 else display_text1 display_text2 = f":red:[{diff_text2}]".join(display_text2.split(diff_text2)) if diff_text2 in display_text2 else display_text2 with col1.expander(f"{filename1} Difference Highlighted"): st.write(display_text1) with col2.expander(f"{filename2} Difference Highlighted"): st.write(display_text2) st.markdown(display_text) except Exception as e: st.error(f"Error finding differences: {str(e)}") if __name__ == "__main__": main()