Spaces:

lintasmediadanawa
/

web_scrape

Sleeping

App Files Files Community

jonathanjordan21 commited on Sep 5, 2024

Commit

cce7a99

verified ·

1 Parent(s): 90df98b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -36

app.py CHANGED Viewed

@@ -90,51 +90,51 @@ async def tiktok_video_details(username: str, video_id:str):
     user_agent = "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"
     res = requests.get(url, headers={"user-agent": user_agent})
-    soup = BeautifulSoup(res.content, "html.parser")
-    insights = soup.find("meta", {"property": "og:description"}).get("content")
-    likes = insights.split(" ", 1)[0]
-    desc = insights.rsplit(" comments. “", 1)[-1][:-1]
-    comments = insights.split(", ", 1)[-1].split(" ", 1)[0]
-    name = soup.find("meta", {"property": "og:title"}).get("content")[9:]
-    return {
-        "insights": {"likeCount": likes, "commentCount": comments, "shareCount":None, "viewCount":None},
-        "description": desc,
-        "username": username,
-        "name": name,
-    }
-    # text_maker = html2text.HTML2Text()
-    # text_maker.ignore_links = True
-    # text_maker.ignore_images = True
-    # text_maker.bypass_tables = False
-    # print("RESPONSE DETAIlL", res.content.decode("utf-8"))
-    # docs = text_maker.handle(res.content.decode("utf-8"))
-    # print("DOCS", docs)
-    # content_detail = docs.split("###")[5]
-    # likes, comments, bookmarks, shares = re.findall(r'\*\*([\w.]+)\*\*', content_detail)
-    # profile = [x.strip() for x in content_detail.split("\n\nSpeed\n\n", 1)[1].split("\n", 6) if x.strip()]
-    # username = profile[0]
-    # date = profile[1].rsplit(" · ", 1)[-1]
-    # desc = profile[-1].replace("**", "")
-    # return {
-    #     "insights":{
-    #         "likeCount":likes,
-    #         "commentCount":comments,
-    #         "bookmarkCount":bookmarks,
-    #         "shareCount":shares
-    #     },
-    #     "username":username,
-    #     "date":date,
-    #     "description":desc
-    # }

     user_agent = "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"
     res = requests.get(url, headers={"user-agent": user_agent})
+    # soup = BeautifulSoup(res.content, "html.parser")
+    # insights = soup.find("meta", {"property": "og:description"}).get("content")
+    # likes = insights.split(" ", 1)[0]
+    # desc = insights.rsplit(" comments. “", 1)[-1][:-1]
+    # comments = insights.split(", ", 1)[-1].split(" ", 1)[0]
+    # name = soup.find("meta", {"property": "og:title"}).get("content")[9:]
+    # return {
+    #     "insights": {"likeCount": likes, "commentCount": comments, "shareCount":None, "viewCount":None},
+    #     "description": desc,
+    #     "username": username,
+    #     "name": name,
+    # }
+    text_maker = html2text.HTML2Text()
+    text_maker.ignore_links = True
+    text_maker.ignore_images = True
+    text_maker.bypass_tables = False
+    print("RESPONSE DETAIlL", res.content.decode("utf-8"))
+    docs = text_maker.handle(res.content.decode("utf-8"))
+    print("DOCS", docs)
+    content_detail = docs.split("###")[5]
+    likes, comments, bookmarks, shares = re.findall(r'\*\*([\w.]+)\*\*', content_detail)
+    profile = [x.strip() for x in content_detail.split("\n\nSpeed\n\n", 1)[1].split("\n", 6) if x.strip()]
+    username = profile[0]
+    date = profile[1].rsplit(" · ", 1)[-1]
+    desc = profile[-1].replace("**", "")
+    return {
+        "insights":{
+            "likeCount":likes,
+            "commentCount":comments,
+            "bookmarkCount":bookmarks,
+            "shareCount":shares
+        },
+        "username":username,
+        "date":date,
+        "description":desc
+    }