Rauhan commited on
Commit
937797f
1 Parent(s): fce68f1

UPDATE: urls

Browse files
Files changed (1) hide show
  1. functions.py +5 -1
functions.py CHANGED
@@ -287,7 +287,11 @@ def getLinks(url: str, timeout = 30):
287
  break
288
  else:
289
  uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
290
- return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
 
 
 
 
291
 
292
 
293
  def getTextLength(text: str):
 
287
  break
288
  else:
289
  uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
290
+ allLinks = {}
291
+ foundLinks = list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
292
+ for link in foundLinks:
293
+ allLinks[link] = len(BeautifulSoup(requests.get(link).text, "lxml").body.get_text(" ", strip = True))
294
+ return allLinks
295
 
296
 
297
  def getTextLength(text: str):