Spaces:
Sleeping
Sleeping
UPDATE: urls
Browse files- functions.py +5 -1
functions.py
CHANGED
@@ -287,7 +287,11 @@ def getLinks(url: str, timeout = 30):
|
|
287 |
break
|
288 |
else:
|
289 |
uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
|
290 |
-
|
|
|
|
|
|
|
|
|
291 |
|
292 |
|
293 |
def getTextLength(text: str):
|
|
|
287 |
break
|
288 |
else:
|
289 |
uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
|
290 |
+
allLinks = {}
|
291 |
+
foundLinks = list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
292 |
+
for link in foundLinks:
|
293 |
+
allLinks[link] = len(BeautifulSoup(requests.get(link).text, "lxml").body.get_text(" ", strip = True))
|
294 |
+
return allLinks
|
295 |
|
296 |
|
297 |
def getTextLength(text: str):
|