jonathanjordan21 commited on
Commit
5d14fc1
·
verified ·
1 Parent(s): d76a753

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -1
app.py CHANGED
@@ -246,7 +246,7 @@ async def fb_post_detail(username: Optional[str] = None, post_id: Optional[str]
246
 
247
 
248
  @app.get("/google_search")
249
- async def google_search(q: Optional[str] = None, delimiter: str = "\n---\n", sites: Annotated[list[str] | None, Query()] = None):
250
  print(sites)
251
  print(type(sites))
252
  url = f"https://www.google.com/search?q={q} "
@@ -269,6 +269,43 @@ async def google_search(q: Optional[str] = None, delimiter: str = "\n---\n", sit
269
  return {"results":texts}
270
 
271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  @app.get("/tiktok_video_details")
273
  async def tiktok_video_details(username: Optional[str] = None, video_id:Optional[str] = None, url: Optional[str] = None):
274
  if not url:
 
246
 
247
 
248
  @app.get("/google_search")
249
+ async def google_search(q: str, delimiter: str = "\n---\n", sites: Annotated[list[str] | None, Query()] = None):
250
  print(sites)
251
  print(type(sites))
252
  url = f"https://www.google.com/search?q={q} "
 
269
  return {"results":texts}
270
 
271
 
272
+ @app.get("/google_search_urls")
273
+ async def google_search_url(q: str, sites: Annotated[list[str] | None, Query()] = None):
274
+ url = f"https://www.google.com/search?q={q} "
275
+ if sites:
276
+ url += " OR ".join(["site:"+site for site in sites])
277
+ res = requests.get(
278
+ url,
279
+ headers={
280
+ "user-agent": "Googlebot",
281
+ "accept-language": "en-US"
282
+ },
283
+ timeout=(10, 27),
284
+ )
285
+
286
+ soup = BeautifulSoup(res.content, "html.parser")
287
+ prefix = "/url?q=h"
288
+ len_prefix = len(prefix)
289
+
290
+ docs = []
291
+ for div in soup.find_all(True):
292
+ if len(div.find_parents()) == 2: # Depth 4 means 3 parent divs (0-indexed)
293
+ a_tags = div.find_all("a")
294
+ for a in a_tags:
295
+ doc = a.get("href")
296
+ if (
297
+ doc[:len_prefix] == prefix
298
+ and "google.com" not in doc[len_prefix - 1 :]
299
+ ):
300
+ docs.append(
301
+ doc[len_prefix - 1 :]
302
+ .split("&")[0]
303
+ .replace("%3F", "?")
304
+ .replace("%3D", "=")
305
+ )
306
+ return {"results":docs}
307
+
308
+
309
  @app.get("/tiktok_video_details")
310
  async def tiktok_video_details(username: Optional[str] = None, video_id:Optional[str] = None, url: Optional[str] = None):
311
  if not url: