Spaces:

lintasmediadanawa
/

web_scrape

Sleeping

App Files Files Community

jonathanjordan21 commited on Sep 5, 2024

Commit

1ecee5c

verified ·

1 Parent(s): 4d674ce

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -1

app.py CHANGED Viewed

@@ -21,6 +21,43 @@ app.add_middleware(
     allow_headers=["*"],
 )
 @app.get("/google_search")
 async def google_search(q: str, delimiter: str = "\n---\n", sites: Annotated[list[str] | None, Query()] = None):
@@ -46,7 +83,7 @@ async def google_search(q: str, delimiter: str = "\n---\n", sites: Annotated[lis
     return {"results":texts}
-@app.get("/tiktok_details")
 async def read_item(username: str, video_id:str):
     user_agent = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36"
     # user_agent = "Googlebot/2.1"

     allow_headers=["*"],
 )
+@app.get("/linkedin_post_details")
+async def linkedin_post_details(post_id: str):
+    url = "https://www.linkedin.com/posts/"+post_id
+    res = requests.get(url)
+    text_maker = html2text.HTML2Text()
+    text_maker.ignore_links = True
+    text_maker.ignore_images = True
+    text_maker.bypass_tables = False
+    docs = text_maker.handle(res.content.decode("utf-8"))
+    chunks = docs.split("\n\n#")
+    linkedin_content = chunks[1]
+    user = linkedin_content.split("\n\n", 5)
+    full_name = user[1]
+    bio = user[2]
+    try:
+        date, edited = user[3].split("  ")
+        edited = True
+    except:
+        date = user[3].strip()
+        edited = False
+    content = "\n\n".join(user[5:])
+    insights = chunks[3].split("\n\n")[2]
+    likes = insights.split(" ", 1)[0].strip()
+    comments = insights.rsplit(" ", 2)[1].strip()
+    return {
+        "user": {"name": full_name, "bio": bio},
+        "content": content,
+        "date": date,
+        "is_edited": edited,
+        "insights": {"likeCount": likes, "commentCount": comments, "shareCount": None},
+    }
 @app.get("/google_search")
 async def google_search(q: str, delimiter: str = "\n---\n", sites: Annotated[list[str] | None, Query()] = None):
     return {"results":texts}
+@app.get("/tiktok_video_details")
 async def read_item(username: str, video_id:str):
     user_agent = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36"
     # user_agent = "Googlebot/2.1"