File size: 2,694 Bytes
673e694
 
 
 
 
 
1fe8b82
559f4a6
 
346a29c
 
559f4a6
1fe8b82
 
559f4a6
 
 
 
 
 
 
 
673e694
346a29c
a270de9
 
 
 
 
346a29c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a4fd31
681d84b
 
4a4fd31
 
 
 
 
 
 
2e3c5d0
673e694
681d84b
673e694
 
 
 
b42350d
d4da168
673e694
 
 
d4da168
 
673e694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from typing import Annotated

from fastapi import FastAPI, Header

import html2text 
import requests

from fastapi.middleware.cors import CORSMiddleware

from bs4 import BeautifulSoup


app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], 
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.get("/google_search")
async def google_search(q: str, sites: list):
    url = f"https://www.google.com/search?q={q}"
    if sites:
        url += "&" + " OR ".join(["site:"+site for site in sites]) 
        
    texts = ""
    soup = BeautifulSoup(requests.get(url).content, "html.parser")
  
    for div in soup.find_all("div")[24:]:
      if len(div.find_parents("div")) == 8:  # Depth 4 means 3 parent divs (0-indexed)
          # print(div.get_text().strip())
          href = div.find(href=True, recursive=True)
          text = div.find(text=True, recursive=False)
          if href and text:
            print(text)
            text = f'[{text}]({href["href"].split("/url?q=")[-1]})'
          if text != None and text.strip():
            texts += text + "\n---\n"
    return {"results":texts}


@app.get("/tiktok_details")
async def read_item(username: str, video_id:str):
    # user_agent = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36"
    user_agent = "Googlebot/2.1"
    # if "https:" in link_detail:
    #     url = link_detail
    # elif link_detail[0] == "/":
    #     url = "https://tiktok.com" + link_detail
    # else:
    #     url = "https://tiktok.com/"+link_detail

    url = f"https://tiktok.com/@{username}/video/{video_id}"

    res = requests.get(url, headers={"user-agent":user_agent})
    text_maker = html2text.HTML2Text()
    text_maker.ignore_links = True
    text_maker.ignore_images = True
    text_maker.bypass_tables = False

    print("RESPONSE DETAIlL", res.content.decode("utf-8"))
    
    docs = text_maker.handle(res.content.decode("utf-8"))

    print("DOCS", docs)

    content_detail = docs.split("###")[5]

    likes, comments, bookmarks, shares = re.findall(r'\*\*([\w.]+)\*\*', content_detail)
    
    
    profile = [x.strip() for x in content_detail.split("\n\nSpeed\n\n", 1)[1].split("\n", 6) if x.strip()]
    username = profile[0]
    date = profile[1].rsplit(" · ", 1)[-1]
    desc = profile[-1].replace("**", "")
    
    return {
        "insights":{
            "likeCount":likes,
            "commentCount":comments,
            "bookmarkCount":bookmarks,
            "shareCount":shares
        },
        "username":username,
        "date":date,
        "description":desc
    }