Spaces:

jayash391
/

221b_Baker_Street

Runtime error

App Files Files Community

jayash391 commited on Apr 28, 2024

Commit

ea05e35

verified ·

1 Parent(s): 9fbf0bd

Upload sherlock2.py

Browse files

Files changed (1) hide show

sherlock2.py +328 -0

sherlock2.py ADDED Viewed

	@@ -0,0 +1,328 @@

+import google.generativeai as genai
+import streamlit as st
+from bs4 import BeautifulSoup
+import wikipedia
+import os
+from googleapiclient.discovery import build
+from dotenv import load_dotenv
+import textwrap
+import PIL
+import PyPDF2
+import textract
+load_dotenv()
+# Configure Gemini API access
+genai.configure(api_key=os.getenv("GEMINI_API_KEY_PROJECTID"))
+# Load pre-trained Gemini model
+model = genai.GenerativeModel('models/gemini-1.5-pro')
+vision_model = genai.GenerativeModel('models/gemini-pro-vision')
+# Define Sherlock Holmes's persona and guidelines
+sherlock_persona = """
+You are Sherlock Holmes, the world-renowned consulting detective residing at 221B Baker Street.
+You possess exceptional deductive reasoning, observation skills, and knowledge in various fields
+such as forensic science, chemistry, and criminal psychology.
+You are known for your sharp wit, logical thinking, and ability to solve complex mysteries.
+"""
+sherlock_guidelines = """
+* Respond in a manner consistent with Sherlock Holmes's personality, maintaining a formal and articulate tone.
+* Utilize your extensive knowledge and deductive reasoning skills to analyze case details and form hypotheses.
+* Employ a keen sense of observation and attention to detail when examining evidence.
+* Consider various possibilities and avoid jumping to conclusions without sufficient evidence.
+* Be confident in your deductions but remain open to new information and alternative perspectives.
+"""
+# Generate embeddings for Sherlock Holmes corpus (models/embedding-001)
+embedding_model = genai.EmbeddingModel('models/embedding-001')
+# Function for embedding generation (using models/embedding-001)
+def generate_embeddings_from_documents(extracted_text):
+    """Generates embeddings for a list of extracted text documents using the 'models/embedding-001' model
+    and the appropriate task type."""
+    embeddings = []
+    for text in extracted_text:
+        try:
+            # Determine the appropriate task type (e.g., "RETRIEVAL_DOCUMENT" for search/similarity)
+            task_type = "RETRIEVAL_DOCUMENT"
+            response = embedding_model.embed_text(text, task_type=task_type)
+            embeddings.append(response["embedding"])
+        except Exception as e:
+            st.error(f"Error generating embeddings: {e}")
+    return embeddings
+# Web scraping and Wikipedia search function
+def search_and_scrape_wikipedia(keywords, max_topics_per_query=3, mining_model='gemini-pro'):
+    """
+    Searches and scrapes Wikipedia for information relevant to the provided keywords.
+    Args:
+        keywords (list): A list of keywords to search for on Wikipedia.
+        max_topics_per_query (int, optional): The maximum number of Wikipedia topics to explore for each query. Defaults to 3.
+        mining_model (str, optional): The name of the generative model to use for extracting relevant information.
+                                     Defaults to 'gemini-pro'.
+    Returns:
+        list: A list of dictionaries, where each dictionary represents a relevant piece of information, with keys:
+        - "topic": The Wikipedia topic title.
+        - "summary": A summary of the relevant information extracted from the topic.
+        - "url": The URL of the Wikipedia page.
+        - "additional_sources": (Optional) A list of additional source URLs extracted from citations.
+    """
+    search_history = set()  # Keep track of explored topics to avoid redundancy
+    wikipedia_info = []
+    mining_model = genai.GenerativeModel(mining_model)  # Initialize the generative model
+    for query in keywords:
+        search_terms = wikipedia.search(query)  # Search Wikipedia using the keyword
+        for search_term in search_terms[:max_topics_per_query]:  # Explore top results
+            if search_term in search_history:
+                continue  # Skip if the topic has already been explored
+            search_history.add(search_term)
+            try:
+                page = wikipedia.page(search_term, auto_suggest=False)  # Get the Wikipedia page
+                url = page.url
+                page_content = page.content
+                # Extract Relevant Information using the Generative Model
+                response = mining_model.generate_content(textwrap.dedent(f"""\
+                    Extract relevant information related to the keyword "{query}"
+                    from the following Wikipedia page content:
+                    {page_content}
+                    Note: Do not summarize the entire page. Only extract and return the information relevant to the keyword.
+                """))
+                additional_sources = []
+                if response.candidates[0].citation_metadata:
+                    additional_sources = [source.url for source in response.candidates[0].citation_metadata.citation_sources]
+                wikipedia_info.append({
+                    "topic": search_term,
+                    "summary": response.text,
+                    "url": url,
+                    "additional_sources": additional_sources
+                })
+            except wikipedia.exceptions.DisambiguationError:  # Handle ambiguous search results
+                print(f"Ambiguous results for '{search_term}' (originally for '{query}'), skipping.")
+            except wikipedia.exceptions.PageError:  # Handle cases where no Wikipedia page is found
+                print(f"No Wikipedia page found for '{search_term}', skipping.")
+            except Exception as e:  # Handle other exceptions
+                st.error(f"Error searching Wikipedia: {e}")
+    return wikipedia_info
+def extract_keywords_simple(extracted_text):
+    """Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
+    prompt = """
+    You are an expert detective assistant. Analyze the following text and extract the most important keywords and
+    information that could be relevant to a criminal investigation:
+    """ + extracted_text
+    response = model.generate_content([prompt])
+    keywords = response.text.strip().split("\n")  # Assuming each keyword is on a separate line
+    return keywords
+# Function to extract text from various file types
+def extract_text_from_files(uploaded_files):
+    """Extracts text content from a list of uploaded files, handling various file types."""
+    extracted_text = []
+    for uploaded_file in uploaded_files:
+        file_type = uploaded_file.type
+        if file_type == "text/plain":
+            # Plain Text File
+            raw_text = str(uploaded_file.read(), "utf-8")
+            extracted_text.append(raw_text.strip())
+        elif file_type == "application/pdf":
+            # PDF Document
+            pdf_reader = PyPDF2.PdfReader(uploaded_file)
+            text = ""
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                text += page.extract_text()
+            extracted_text.append(text)
+        else:
+            # Other Document Types (Using Textract)
+            try:
+                text = textract.process(uploaded_file).decode("utf-8")
+                extracted_text.append(text)
+            except Exception as e:
+                st.error(f"Error extracting text from file: {e}")
+    return extracted_text
+# Function to process images using Gemini Pro Vision
+def process_images(uploaded_images):
+    """Processes a list of uploaded images using Gemini Pro Vision to extract relevant information."""
+    image_insights = []
+    for uploaded_image in uploaded_images:
+        try:
+            image = PIL.Image.open(uploaded_image)
+            prompt = """
+            Analyze the provided image and extract any relevant information that could be useful for an investigation.
+            """
+            response = vision_model.generate_content([prompt, image])
+            image_insights.append(response.text)
+        except Exception as e:
+            st.error(f"Error processing image: {e}")
+    return image_insights
+def search_internet(case_text):
+    """Generates search queries using Gemini 1.5 Pro and performs internet searches for case-related information."""
+    prompt = """
+    You are an expert detective assistant. Analyze the following case information and generate a list of search queries
+    to find relevant information on the internet:
+    """ + str(case_text)
+    response = model.generate_content([prompt])
+    search_queries = response.text.strip().split("\n")
+    # Set up Google Custom Search API client
+    api_key = "AIzaSyD-1OMuZ0CxGAek0PaXrzHOmcDWFvZQtm8"
+    cse_id = "73499643bc7bf47ed"
+    service = build("customsearch", "v1", developerKey=api_key)
+    internet_search_results = []
+    for query in search_queries:
+        try:
+            # Perform Google Custom Search API request
+            result = service.cse().list(q=query, cx=cse_id).execute()
+            # Extract relevant information from search results
+            search_results = []
+            if "items" in result:
+                for item in result["items"]:
+                    title = item.get("title", "")
+                    snippet = item.get("snippet", "")
+                    link = item.get("link", "")
+                    search_results.append({"title": title, "snippet": snippet, "url": link})
+            internet_search_results.extend(search_results)  # Accumulate results from each query
+        except Exception as e:
+            st.error(f"Error searching the internet: {e}")
+    return internet_search_results
+def investigate():
+    """Handles the case investigation process, including file upload, text extraction, embedding generation,
+    image processing, information analysis using Gemini models, web/Wikipedia search, and case report generation.
+    """
+    st.header("Case Investigation")
+    # File upload for documents and images
+    uploaded_documents = st.file_uploader("Upload Case Documents", accept_multiple_files=True, type=["txt", "pdf", "docx"])
+    uploaded_images = st.file_uploader("Upload Case Images", accept_multiple_files=True, type=["jpg", "png", "jpeg"])
+    if uploaded_documents and uploaded_images and st.button("Analyze Case"):
+        # Extract text from uploaded documents
+        case_text = extract_text_from_files(uploaded_documents)
+        # Extract keywords and important information from the text
+        keywords = extract_keywords_simple("\n\n".join(case_text))
+        # Generate embeddings for the extracted text
+        case_embeddings = generate_embeddings_from_documents(case_text)
+        # Process images using Gemini Pro Vision
+        image_insights = process_images(uploaded_images)
+        # Combine text, image, and keyword information
+        combined_information = {
+            "case_text": case_text,
+            "case_embeddings": case_embeddings,
+            "image_insights": image_insights,
+            "keywords": keywords
+        }
+        # Analyze combined information using Gemini 1.5 Pro
+        prompt = """
+        You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
+        suggestions for further investigation:
+        """ + str(combined_information)
+        response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt, *case_embeddings])
+        st.write(response.text)
+        # Search Wikipedia and the web for related information
+        wikipedia_info = search_and_scrape_wikipedia(keywords)
+        web_search_results = search_internet("\n\n".join(case_text))  # Search the web
+        # Generate a case report in Sherlock Holmes's style
+        report_prompt = """
+        You are Sherlock Holmes, the renowned detective. Based on the case information, your analysis, findings from
+        Wikipedia and the web, and the extracted keywords, generate a comprehensive case report in your signature style,
+        including deductions, potential suspects, and conclusions.
+        """
+        final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
+                                               *case_embeddings, str(wikipedia_info), str(web_search_results)])
+        st.header("Case Report")
+        st.write(final_report.text)
+    else:
+        st.info("Please upload both case documents and images to proceed with the investigation.")
+        # Chat with Sherlock Holmes (Gemini 1.5 Pro)
+        st.write("Alternatively, you may engage in a conversation with Sherlock Holmes.")
+        user_query = st.text_input("Ask Sherlock:")
+        if user_query:
+            response = model.generate_content([sherlock_persona, sherlock_guidelines, user_query])
+            st.write(response.text)
+def main():
+    # --- Vintage Sherlock Holmes Theme ---
+    st.set_page_config(page_title="AI Detective Sherlock Holmes", page_icon=":mag_right:")
+    # Custom CSS for Styling
+    vintage_css = """
+    <style>
+    body {
+        background-color: #d2b48c; /* Antique White */
+        color: #332200; /* Dark Brown */
+        font-family: 'Times New Roman', serif;
+    }
+    h1, h2, h3 {
+        color: #8b4513; /* Saddle Brown */
+    }
+    .stTextInput > div > div > input {
+        border: 1px solid #8b4513;
+        border-radius: 5px;
+    }
+    .stButton > button {
+        background-color: #8b4513;
+        color: white;
+        border: none;
+        border-radius: 5px;
+    }
+    </style>
+    """
+    st.markdown(vintage_css, unsafe_allow_html=True)  # Apply custom CSS
+    # Title and Header
+    st.title("AI Detective Sherlock Holmes")
+    st.header("_'Elementary, my dear Watson!'_")
+    # Add a sidebar for navigation
+    st.sidebar.title("Navigation")
+    options = ["Investigate Case", "Chat with Sherlock"]
+    choice = st.sidebar.radio("Choose an option:", options)
+    if choice == "Investigate Case":
+        investigate()
+    else:
+        # Chat with Sherlock Holmes (Gemini 1.5 Pro)
+        st.write("No case files uploaded. Feel free to chat with Sherlock Holmes.")
+        user_query = st.text_input("Ask Sherlock:")
+        if user_query:
+            response = model.generate_content([sherlock_persona, sherlock_guidelines, user_query])
+            st.write(response.text)
+if __name__ == "__main__":
+    main()