Spaces:
Sleeping
Sleeping
File size: 2,694 Bytes
673e694 1fe8b82 559f4a6 346a29c 559f4a6 1fe8b82 559f4a6 673e694 346a29c a270de9 346a29c 4a4fd31 681d84b 4a4fd31 2e3c5d0 673e694 681d84b 673e694 b42350d d4da168 673e694 d4da168 673e694 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from typing import Annotated
from fastapi import FastAPI, Header
import html2text
import requests
from fastapi.middleware.cors import CORSMiddleware
from bs4 import BeautifulSoup
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/google_search")
async def google_search(q: str, sites: list):
url = f"https://www.google.com/search?q={q}"
if sites:
url += "&" + " OR ".join(["site:"+site for site in sites])
texts = ""
soup = BeautifulSoup(requests.get(url).content, "html.parser")
for div in soup.find_all("div")[24:]:
if len(div.find_parents("div")) == 8: # Depth 4 means 3 parent divs (0-indexed)
# print(div.get_text().strip())
href = div.find(href=True, recursive=True)
text = div.find(text=True, recursive=False)
if href and text:
print(text)
text = f'[{text}]({href["href"].split("/url?q=")[-1]})'
if text != None and text.strip():
texts += text + "\n---\n"
return {"results":texts}
@app.get("/tiktok_details")
async def read_item(username: str, video_id:str):
# user_agent = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36"
user_agent = "Googlebot/2.1"
# if "https:" in link_detail:
# url = link_detail
# elif link_detail[0] == "/":
# url = "https://tiktok.com" + link_detail
# else:
# url = "https://tiktok.com/"+link_detail
url = f"https://tiktok.com/@{username}/video/{video_id}"
res = requests.get(url, headers={"user-agent":user_agent})
text_maker = html2text.HTML2Text()
text_maker.ignore_links = True
text_maker.ignore_images = True
text_maker.bypass_tables = False
print("RESPONSE DETAIlL", res.content.decode("utf-8"))
docs = text_maker.handle(res.content.decode("utf-8"))
print("DOCS", docs)
content_detail = docs.split("###")[5]
likes, comments, bookmarks, shares = re.findall(r'\*\*([\w.]+)\*\*', content_detail)
profile = [x.strip() for x in content_detail.split("\n\nSpeed\n\n", 1)[1].split("\n", 6) if x.strip()]
username = profile[0]
date = profile[1].rsplit(" · ", 1)[-1]
desc = profile[-1].replace("**", "")
return {
"insights":{
"likeCount":likes,
"commentCount":comments,
"bookmarkCount":bookmarks,
"shareCount":shares
},
"username":username,
"date":date,
"description":desc
} |