jayash391 commited on
Commit
ea05e35
·
verified ·
1 Parent(s): 9fbf0bd

Upload sherlock2.py

Browse files
Files changed (1) hide show
  1. sherlock2.py +328 -0
sherlock2.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ import streamlit as st
3
+ from bs4 import BeautifulSoup
4
+ import wikipedia
5
+ import os
6
+ from googleapiclient.discovery import build
7
+ from dotenv import load_dotenv
8
+ import textwrap
9
+ import PIL
10
+ import PyPDF2
11
+ import textract
12
+
13
+ load_dotenv()
14
+
15
+ # Configure Gemini API access
16
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY_PROJECTID"))
17
+
18
+ # Load pre-trained Gemini model
19
+ model = genai.GenerativeModel('models/gemini-1.5-pro')
20
+ vision_model = genai.GenerativeModel('models/gemini-pro-vision')
21
+
22
+ # Define Sherlock Holmes's persona and guidelines
23
+ sherlock_persona = """
24
+ You are Sherlock Holmes, the world-renowned consulting detective residing at 221B Baker Street.
25
+ You possess exceptional deductive reasoning, observation skills, and knowledge in various fields
26
+ such as forensic science, chemistry, and criminal psychology.
27
+ You are known for your sharp wit, logical thinking, and ability to solve complex mysteries.
28
+ """
29
+
30
+ sherlock_guidelines = """
31
+ * Respond in a manner consistent with Sherlock Holmes's personality, maintaining a formal and articulate tone.
32
+ * Utilize your extensive knowledge and deductive reasoning skills to analyze case details and form hypotheses.
33
+ * Employ a keen sense of observation and attention to detail when examining evidence.
34
+ * Consider various possibilities and avoid jumping to conclusions without sufficient evidence.
35
+ * Be confident in your deductions but remain open to new information and alternative perspectives.
36
+ """
37
+
38
+ # Generate embeddings for Sherlock Holmes corpus (models/embedding-001)
39
+ embedding_model = genai.EmbeddingModel('models/embedding-001')
40
+
41
+ # Function for embedding generation (using models/embedding-001)
42
+ def generate_embeddings_from_documents(extracted_text):
43
+ """Generates embeddings for a list of extracted text documents using the 'models/embedding-001' model
44
+ and the appropriate task type."""
45
+ embeddings = []
46
+ for text in extracted_text:
47
+ try:
48
+ # Determine the appropriate task type (e.g., "RETRIEVAL_DOCUMENT" for search/similarity)
49
+ task_type = "RETRIEVAL_DOCUMENT"
50
+ response = embedding_model.embed_text(text, task_type=task_type)
51
+ embeddings.append(response["embedding"])
52
+ except Exception as e:
53
+ st.error(f"Error generating embeddings: {e}")
54
+ return embeddings
55
+
56
+
57
+ # Web scraping and Wikipedia search function
58
+ def search_and_scrape_wikipedia(keywords, max_topics_per_query=3, mining_model='gemini-pro'):
59
+ """
60
+ Searches and scrapes Wikipedia for information relevant to the provided keywords.
61
+
62
+ Args:
63
+ keywords (list): A list of keywords to search for on Wikipedia.
64
+ max_topics_per_query (int, optional): The maximum number of Wikipedia topics to explore for each query. Defaults to 3.
65
+ mining_model (str, optional): The name of the generative model to use for extracting relevant information.
66
+ Defaults to 'gemini-pro'.
67
+
68
+ Returns:
69
+ list: A list of dictionaries, where each dictionary represents a relevant piece of information, with keys:
70
+ - "topic": The Wikipedia topic title.
71
+ - "summary": A summary of the relevant information extracted from the topic.
72
+ - "url": The URL of the Wikipedia page.
73
+ - "additional_sources": (Optional) A list of additional source URLs extracted from citations.
74
+ """
75
+
76
+ search_history = set() # Keep track of explored topics to avoid redundancy
77
+ wikipedia_info = []
78
+ mining_model = genai.GenerativeModel(mining_model) # Initialize the generative model
79
+
80
+ for query in keywords:
81
+ search_terms = wikipedia.search(query) # Search Wikipedia using the keyword
82
+
83
+ for search_term in search_terms[:max_topics_per_query]: # Explore top results
84
+ if search_term in search_history:
85
+ continue # Skip if the topic has already been explored
86
+
87
+ search_history.add(search_term)
88
+
89
+ try:
90
+ page = wikipedia.page(search_term, auto_suggest=False) # Get the Wikipedia page
91
+ url = page.url
92
+ page_content = page.content
93
+
94
+ # Extract Relevant Information using the Generative Model
95
+ response = mining_model.generate_content(textwrap.dedent(f"""\
96
+ Extract relevant information related to the keyword "{query}"
97
+ from the following Wikipedia page content:
98
+
99
+ {page_content}
100
+
101
+ Note: Do not summarize the entire page. Only extract and return the information relevant to the keyword.
102
+ """))
103
+
104
+ additional_sources = []
105
+ if response.candidates[0].citation_metadata:
106
+ additional_sources = [source.url for source in response.candidates[0].citation_metadata.citation_sources]
107
+
108
+ wikipedia_info.append({
109
+ "topic": search_term,
110
+ "summary": response.text,
111
+ "url": url,
112
+ "additional_sources": additional_sources
113
+ })
114
+
115
+ except wikipedia.exceptions.DisambiguationError: # Handle ambiguous search results
116
+ print(f"Ambiguous results for '{search_term}' (originally for '{query}'), skipping.")
117
+ except wikipedia.exceptions.PageError: # Handle cases where no Wikipedia page is found
118
+ print(f"No Wikipedia page found for '{search_term}', skipping.")
119
+ except Exception as e: # Handle other exceptions
120
+ st.error(f"Error searching Wikipedia: {e}")
121
+
122
+ return wikipedia_info
123
+
124
+ def extract_keywords_simple(extracted_text):
125
+ """Extracts keywords and important information from the given text using Gemini 1.5 Pro."""
126
+ prompt = """
127
+ You are an expert detective assistant. Analyze the following text and extract the most important keywords and
128
+ information that could be relevant to a criminal investigation:
129
+ """ + extracted_text
130
+
131
+ response = model.generate_content([prompt])
132
+ keywords = response.text.strip().split("\n") # Assuming each keyword is on a separate line
133
+ return keywords
134
+
135
+ # Function to extract text from various file types
136
+ def extract_text_from_files(uploaded_files):
137
+ """Extracts text content from a list of uploaded files, handling various file types."""
138
+ extracted_text = []
139
+ for uploaded_file in uploaded_files:
140
+ file_type = uploaded_file.type
141
+ if file_type == "text/plain":
142
+ # Plain Text File
143
+ raw_text = str(uploaded_file.read(), "utf-8")
144
+ extracted_text.append(raw_text.strip())
145
+ elif file_type == "application/pdf":
146
+ # PDF Document
147
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
148
+ text = ""
149
+ for page_num in range(len(pdf_reader.pages)):
150
+ page = pdf_reader.pages[page_num]
151
+ text += page.extract_text()
152
+ extracted_text.append(text)
153
+ else:
154
+ # Other Document Types (Using Textract)
155
+ try:
156
+ text = textract.process(uploaded_file).decode("utf-8")
157
+ extracted_text.append(text)
158
+ except Exception as e:
159
+ st.error(f"Error extracting text from file: {e}")
160
+ return extracted_text
161
+
162
+ # Function to process images using Gemini Pro Vision
163
+ def process_images(uploaded_images):
164
+ """Processes a list of uploaded images using Gemini Pro Vision to extract relevant information."""
165
+ image_insights = []
166
+ for uploaded_image in uploaded_images:
167
+ try:
168
+ image = PIL.Image.open(uploaded_image)
169
+ prompt = """
170
+ Analyze the provided image and extract any relevant information that could be useful for an investigation.
171
+ """
172
+ response = vision_model.generate_content([prompt, image])
173
+ image_insights.append(response.text)
174
+ except Exception as e:
175
+ st.error(f"Error processing image: {e}")
176
+ return image_insights
177
+
178
+ def search_internet(case_text):
179
+ """Generates search queries using Gemini 1.5 Pro and performs internet searches for case-related information."""
180
+ prompt = """
181
+ You are an expert detective assistant. Analyze the following case information and generate a list of search queries
182
+ to find relevant information on the internet:
183
+ """ + str(case_text)
184
+
185
+ response = model.generate_content([prompt])
186
+ search_queries = response.text.strip().split("\n")
187
+
188
+ # Set up Google Custom Search API client
189
+ api_key = "AIzaSyD-1OMuZ0CxGAek0PaXrzHOmcDWFvZQtm8"
190
+ cse_id = "73499643bc7bf47ed"
191
+ service = build("customsearch", "v1", developerKey=api_key)
192
+
193
+ internet_search_results = []
194
+ for query in search_queries:
195
+ try:
196
+ # Perform Google Custom Search API request
197
+ result = service.cse().list(q=query, cx=cse_id).execute()
198
+
199
+ # Extract relevant information from search results
200
+ search_results = []
201
+ if "items" in result:
202
+ for item in result["items"]:
203
+ title = item.get("title", "")
204
+ snippet = item.get("snippet", "")
205
+ link = item.get("link", "")
206
+ search_results.append({"title": title, "snippet": snippet, "url": link})
207
+
208
+ internet_search_results.extend(search_results) # Accumulate results from each query
209
+ except Exception as e:
210
+ st.error(f"Error searching the internet: {e}")
211
+
212
+ return internet_search_results
213
+
214
+ def investigate():
215
+ """Handles the case investigation process, including file upload, text extraction, embedding generation,
216
+ image processing, information analysis using Gemini models, web/Wikipedia search, and case report generation.
217
+ """
218
+ st.header("Case Investigation")
219
+
220
+ # File upload for documents and images
221
+ uploaded_documents = st.file_uploader("Upload Case Documents", accept_multiple_files=True, type=["txt", "pdf", "docx"])
222
+ uploaded_images = st.file_uploader("Upload Case Images", accept_multiple_files=True, type=["jpg", "png", "jpeg"])
223
+
224
+ if uploaded_documents and uploaded_images and st.button("Analyze Case"):
225
+ # Extract text from uploaded documents
226
+ case_text = extract_text_from_files(uploaded_documents)
227
+
228
+ # Extract keywords and important information from the text
229
+ keywords = extract_keywords_simple("\n\n".join(case_text))
230
+
231
+ # Generate embeddings for the extracted text
232
+ case_embeddings = generate_embeddings_from_documents(case_text)
233
+
234
+ # Process images using Gemini Pro Vision
235
+ image_insights = process_images(uploaded_images)
236
+
237
+ # Combine text, image, and keyword information
238
+ combined_information = {
239
+ "case_text": case_text,
240
+ "case_embeddings": case_embeddings,
241
+ "image_insights": image_insights,
242
+ "keywords": keywords
243
+ }
244
+
245
+ # Analyze combined information using Gemini 1.5 Pro
246
+ prompt = """
247
+ You are Sherlock Holmes, the renowned detective. Analyze the following case information and provide insights or
248
+ suggestions for further investigation:
249
+ """ + str(combined_information)
250
+
251
+ response = model.generate_content([sherlock_persona, sherlock_guidelines, prompt, *case_embeddings])
252
+ st.write(response.text)
253
+
254
+ # Search Wikipedia and the web for related information
255
+ wikipedia_info = search_and_scrape_wikipedia(keywords)
256
+ web_search_results = search_internet("\n\n".join(case_text)) # Search the web
257
+
258
+ # Generate a case report in Sherlock Holmes's style
259
+ report_prompt = """
260
+ You are Sherlock Holmes, the renowned detective. Based on the case information, your analysis, findings from
261
+ Wikipedia and the web, and the extracted keywords, generate a comprehensive case report in your signature style,
262
+ including deductions, potential suspects, and conclusions.
263
+ """
264
+
265
+ final_report = model.generate_content([sherlock_persona, sherlock_guidelines, report_prompt,
266
+ *case_embeddings, str(wikipedia_info), str(web_search_results)])
267
+ st.header("Case Report")
268
+ st.write(final_report.text)
269
+
270
+ else:
271
+ st.info("Please upload both case documents and images to proceed with the investigation.")
272
+
273
+ # Chat with Sherlock Holmes (Gemini 1.5 Pro)
274
+ st.write("Alternatively, you may engage in a conversation with Sherlock Holmes.")
275
+ user_query = st.text_input("Ask Sherlock:")
276
+ if user_query:
277
+ response = model.generate_content([sherlock_persona, sherlock_guidelines, user_query])
278
+ st.write(response.text)
279
+ def main():
280
+ # --- Vintage Sherlock Holmes Theme ---
281
+ st.set_page_config(page_title="AI Detective Sherlock Holmes", page_icon=":mag_right:")
282
+
283
+ # Custom CSS for Styling
284
+ vintage_css = """
285
+ <style>
286
+ body {
287
+ background-color: #d2b48c; /* Antique White */
288
+ color: #332200; /* Dark Brown */
289
+ font-family: 'Times New Roman', serif;
290
+ }
291
+ h1, h2, h3 {
292
+ color: #8b4513; /* Saddle Brown */
293
+ }
294
+ .stTextInput > div > div > input {
295
+ border: 1px solid #8b4513;
296
+ border-radius: 5px;
297
+ }
298
+ .stButton > button {
299
+ background-color: #8b4513;
300
+ color: white;
301
+ border: none;
302
+ border-radius: 5px;
303
+ }
304
+ </style>
305
+ """
306
+ st.markdown(vintage_css, unsafe_allow_html=True) # Apply custom CSS
307
+
308
+ # Title and Header
309
+ st.title("AI Detective Sherlock Holmes")
310
+ st.header("_'Elementary, my dear Watson!'_")
311
+
312
+ # Add a sidebar for navigation
313
+ st.sidebar.title("Navigation")
314
+ options = ["Investigate Case", "Chat with Sherlock"]
315
+ choice = st.sidebar.radio("Choose an option:", options)
316
+
317
+ if choice == "Investigate Case":
318
+ investigate()
319
+ else:
320
+ # Chat with Sherlock Holmes (Gemini 1.5 Pro)
321
+ st.write("No case files uploaded. Feel free to chat with Sherlock Holmes.")
322
+ user_query = st.text_input("Ask Sherlock:")
323
+ if user_query:
324
+ response = model.generate_content([sherlock_persona, sherlock_guidelines, user_query])
325
+ st.write(response.text)
326
+
327
+ if __name__ == "__main__":
328
+ main()