web_scrape / app.py
jonathanjordan21's picture
Update app.py
673e694 verified
raw
history blame
1.45 kB
from typing import Annotated
from fastapi import FastAPI, Header
import html2text
import requests
app = FastAPI()
@app.get("/tiktok_details/")
async def read_item(link_detail: str):
user_agent = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36"
if "https:" in link_detail:
url = link_detail
elif link_detail[0] == "/":
url = "https://tiktok.com" + link_detail
else:
url = "https://tiktok.com/"+link_detail
res = requests.get(url, headers={"user-agent":user_agent})
text_maker = html2text.HTML2Text()
text_maker.ignore_links = True
text_maker.ignore_images = True
text_maker.bypass_tables = False
docs = text_maker.handle(res.content.decode("utf-8"))
content_detail = docs.split("###")[5]
likes, comments, bookmarks, shares = re.findall(r'\*\*([\w.]+)\*\*', content_detail)
profile = [x.strip() for x in content_detail.split("\n\nSpeed\n\n", 1)[1].split("\n", 6) if x.strip()]
username = profile[0]
date = profile[1].rsplit(" · ", 1)[-1]
desc = profile[-1].replace("**", "")
return {
"insights":{
"likeCount":likes,
"commentCount":comments,
"bookmarkCount":bookmarks,
"shareCount":shares
},
"username":username,
"date":date,
"description":desc
}