Update routers/getnews.py
Browse files- routers/getnews.py +17 -2
routers/getnews.py
CHANGED
@@ -39,10 +39,25 @@ query GetNews($first: Int!) {
|
|
39 |
"""
|
40 |
|
41 |
def clean_html(raw_html: str) -> str:
|
42 |
-
#
|
43 |
text = BeautifulSoup(raw_html or "", "html.parser").get_text(separator=" ", strip=True)
|
44 |
-
|
|
|
45 |
text = re.sub(r"\s+", " ", text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
return text.strip()
|
47 |
|
48 |
@router.get("/news")
|
|
|
39 |
"""
|
40 |
|
41 |
def clean_html(raw_html: str) -> str:
|
42 |
+
# Remove tags HTML
|
43 |
text = BeautifulSoup(raw_html or "", "html.parser").get_text(separator=" ", strip=True)
|
44 |
+
|
45 |
+
# Remove múltiplos espaços, tabs, quebras de linha, etc.
|
46 |
text = re.sub(r"\s+", " ", text)
|
47 |
+
|
48 |
+
# Remove espaços antes de pontuações
|
49 |
+
text = re.sub(r"\s+([.,;:!?])", r"\1", text)
|
50 |
+
|
51 |
+
# Remove espaços após parênteses de abertura e antes de fechamento
|
52 |
+
text = re.sub(r"\(\s+", "(", text)
|
53 |
+
text = re.sub(r"\s+\)", ")", text)
|
54 |
+
|
55 |
+
# Remove espaços desnecessários entre colchetes ou chaves se quiser estender
|
56 |
+
text = re.sub(r"\[\s+", "[", text)
|
57 |
+
text = re.sub(r"\s+\]", "]", text)
|
58 |
+
text = re.sub(r"\{\s+", "{", text)
|
59 |
+
text = re.sub(r"\s+\}", "}", text)
|
60 |
+
|
61 |
return text.strip()
|
62 |
|
63 |
@router.get("/news")
|