Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -148,6 +148,8 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
148 |
all_results = []
|
149 |
max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
|
150 |
|
|
|
|
|
151 |
with requests.Session() as session:
|
152 |
while start < num_results:
|
153 |
try:
|
@@ -169,17 +171,23 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
169 |
verify=ssl_verify,
|
170 |
)
|
171 |
resp.raise_for_status()
|
|
|
172 |
except requests.exceptions.RequestException as e:
|
|
|
173 |
break
|
174 |
|
175 |
soup = BeautifulSoup(resp.text, "html.parser")
|
176 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
177 |
if not result_block:
|
|
|
178 |
break
|
|
|
|
|
179 |
for result in result_block:
|
180 |
link = result.find("a", href=True)
|
181 |
if link:
|
182 |
link = link["href"]
|
|
|
183 |
try:
|
184 |
webpage = session.get(link, headers=headers, timeout=timeout)
|
185 |
webpage.raise_for_status()
|
@@ -187,11 +195,26 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
187 |
if len(visible_text) > max_chars_per_page:
|
188 |
visible_text = visible_text[:max_chars_per_page] + "..."
|
189 |
all_results.append({"link": link, "text": visible_text})
|
|
|
190 |
except requests.exceptions.RequestException as e:
|
|
|
191 |
all_results.append({"link": link, "text": None})
|
192 |
else:
|
|
|
193 |
all_results.append({"link": None, "text": None})
|
194 |
start += len(result_block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
return all_results
|
196 |
|
197 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search):
|
|
|
148 |
all_results = []
|
149 |
max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
|
150 |
|
151 |
+
print(f"Starting Google search for term: '{term}'")
|
152 |
+
|
153 |
with requests.Session() as session:
|
154 |
while start < num_results:
|
155 |
try:
|
|
|
171 |
verify=ssl_verify,
|
172 |
)
|
173 |
resp.raise_for_status()
|
174 |
+
print(f"Successfully retrieved search results page (start={start})")
|
175 |
except requests.exceptions.RequestException as e:
|
176 |
+
print(f"Error retrieving search results: {e}")
|
177 |
break
|
178 |
|
179 |
soup = BeautifulSoup(resp.text, "html.parser")
|
180 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
181 |
if not result_block:
|
182 |
+
print("No results found on this page")
|
183 |
break
|
184 |
+
|
185 |
+
print(f"Found {len(result_block)} results on this page")
|
186 |
for result in result_block:
|
187 |
link = result.find("a", href=True)
|
188 |
if link:
|
189 |
link = link["href"]
|
190 |
+
print(f"Processing link: {link}")
|
191 |
try:
|
192 |
webpage = session.get(link, headers=headers, timeout=timeout)
|
193 |
webpage.raise_for_status()
|
|
|
195 |
if len(visible_text) > max_chars_per_page:
|
196 |
visible_text = visible_text[:max_chars_per_page] + "..."
|
197 |
all_results.append({"link": link, "text": visible_text})
|
198 |
+
print(f"Successfully extracted text from {link}")
|
199 |
except requests.exceptions.RequestException as e:
|
200 |
+
print(f"Error retrieving webpage content: {e}")
|
201 |
all_results.append({"link": link, "text": None})
|
202 |
else:
|
203 |
+
print("No link found for this result")
|
204 |
all_results.append({"link": None, "text": None})
|
205 |
start += len(result_block)
|
206 |
+
|
207 |
+
print(f"Search completed. Total results: {len(all_results)}")
|
208 |
+
print("Search results:")
|
209 |
+
for i, result in enumerate(all_results, 1):
|
210 |
+
print(f"Result {i}:")
|
211 |
+
print(f" Link: {result['link']}")
|
212 |
+
if result['text']:
|
213 |
+
print(f" Text: {result['text'][:100]}...") # Print first 100 characters
|
214 |
+
else:
|
215 |
+
print(" Text: None")
|
216 |
+
print("End of search results")
|
217 |
+
|
218 |
return all_results
|
219 |
|
220 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search):
|