jonathanjordan21 commited on
Commit
2e4e6d4
·
verified ·
1 Parent(s): 1b686c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -28
app.py CHANGED
@@ -28,40 +28,82 @@ async def linkedin_post_details(post_id: str):
28
  url = "https://www.linkedin.com/posts/"+post_id
29
  res = requests.get(url, headers={"user-agent":"Googlebot", "accept-language": "en-US"})
30
 
31
- text_maker = html2text.HTML2Text()
32
- text_maker.ignore_links = True
33
- text_maker.ignore_images = True
34
- text_maker.bypass_tables = False
35
 
36
- docs = text_maker.handle(res.content.decode("utf-8"))
37
 
38
- chunks = docs.split("\n\n#")
39
- linkedin_content = chunks[1]
40
- user = linkedin_content.split("\n\n", 5)
41
- full_name = user[1]
42
- bio = user[2]
43
- try:
44
- date, edited = user[3].split(" ")
45
- edited = True
46
- except:
47
- date = user[3].strip()
48
- edited = False
49
- content = "\n\n".join(user[5:])
50
-
51
- insights = chunks[3].split("\n\n")[2]
52
- likes = insights.split(" ", 1)[0].strip()
53
- comments = insights.rsplit(" ", 2)[1].strip()
 
 
 
54
 
55
- username = url.rsplit("/",1)[-1].split("_")[0]
56
-
57
  return {
58
- "userDetails": {"full_name": full_name, "username":username,"bio": bio},
59
- "content": content,
 
 
 
 
 
 
 
 
 
60
  "date": date,
61
- "is_edited": edited,
62
- "insights": {"likeCount": likes, "commentCount": comments, "shareCount": None, "viewCount":None},
63
- "username":username
64
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  @app.get("/facebook_post_detail")
 
28
  url = "https://www.linkedin.com/posts/"+post_id
29
  res = requests.get(url, headers={"user-agent":"Googlebot", "accept-language": "en-US"})
30
 
31
+ soup = BeautifulSoup(res.content, "html.parser")
 
 
 
32
 
33
+ script_tags = soup.find_all("script")
34
 
35
+ for script_tag in script_tags:
36
+ try:
37
+ script_tag = json.loads(script_tag.string)
38
+ if script_tag.get("articleBody"):
39
+ desc = script_tag.get("articleBody")
40
+ author = script_tag.get("author")
41
+ full_name = author.get("name")
42
+ username = author.get("url").rsplit("/", 1)[-1]
43
+ user_type = author.get("@type").lower()
44
+ date = script_tag.get("datePublished")
45
+ except Exception as e:
46
+ continue
47
+
48
+ spans = soup.find_all("span", {"data-test-id": "social-actions__reaction-count"})
49
+ reactions = spans[0].text.strip()
50
+ shares = spans[-1].text.strip()
51
+ comments = soup.find("a", {"data-test-id": "social-actions__comments"}).get(
52
+ "data-num-comments"
53
+ )
54
 
 
 
55
  return {
56
+ "insights": {
57
+ "likeCount": None,
58
+ "commentCount": comments,
59
+ "shareCount": shares,
60
+ "reactionCount": reactions,
61
+ "reactions": [],
62
+ },
63
+ "description": desc,
64
+ "username": username,
65
+ "name": full_name,
66
+ "userType": user_type,
67
  "date": date,
 
 
 
68
  }
69
+ # async def linkedin_post_details(post_id: str):
70
+ # url = "https://www.linkedin.com/posts/"+post_id
71
+ # res = requests.get(url, headers={"user-agent":"Googlebot", "accept-language": "en-US"})
72
+
73
+ # text_maker = html2text.HTML2Text()
74
+ # text_maker.ignore_links = True
75
+ # text_maker.ignore_images = True
76
+ # text_maker.bypass_tables = False
77
+
78
+ # docs = text_maker.handle(res.content.decode("utf-8"))
79
+
80
+ # chunks = docs.split("\n\n#")
81
+ # linkedin_content = chunks[1]
82
+ # user = linkedin_content.split("\n\n", 5)
83
+ # full_name = user[1]
84
+ # bio = user[2]
85
+ # try:
86
+ # date, edited = user[3].split(" ")
87
+ # edited = True
88
+ # except:
89
+ # date = user[3].strip()
90
+ # edited = False
91
+ # content = "\n\n".join(user[5:])
92
+
93
+ # insights = chunks[3].split("\n\n")[2]
94
+ # likes = insights.split(" ", 1)[0].strip()
95
+ # comments = insights.rsplit(" ", 2)[1].strip()
96
+
97
+ # username = url.rsplit("/",1)[-1].split("_")[0]
98
+
99
+ # return {
100
+ # "userDetails": {"full_name": full_name, "username":username,"bio": bio},
101
+ # "content": content,
102
+ # "date": date,
103
+ # "is_edited": edited,
104
+ # "insights": {"likeCount": likes, "commentCount": comments, "shareCount": None, "viewCount":None},
105
+ # "username":username
106
+ # }
107
 
108
 
109
  @app.get("/facebook_post_detail")