Spaces:

habulaj
/

newapi

Running

App Files Files Community

newapi / routers /getnews.py

habulaj

Update routers/getnews.py

c44f2ff verified 29 minutes ago

raw

history blame contribute delete

2.76 kB

	from fastapi import APIRouter, HTTPException
	import httpx
	from typing import List, Dict
	from bs4 import BeautifulSoup
	import re

	router = APIRouter()

	GRAPHQL_URL = "https://api.graphql.imdb.com"
	HEADERS = {"Content-Type": "application/json"}

	QUERY = """
	query GetNews($first: Int!) {
	movieNews: news(first: $first, category: MOVIE) {
	edges {
	node {
	id
	articleTitle { plainText }
	externalUrl
	date
	text { plaidHtml }
	image { url }
	}
	}
	}
	tvNews: news(first: $first, category: TV) {
	edges {
	node {
	id
	articleTitle { plainText }
	externalUrl
	date
	text { plaidHtml }
	image { url }
	}
	}
	}
	}
	"""

	def clean_html(raw_html: str) -> str:
	# Remove tags HTML
	text = BeautifulSoup(raw_html or "", "html.parser").get_text(separator=" ", strip=True)

	# Remove múltiplos espaços, tabs, quebras de linha, etc.
	text = re.sub(r"\s+", " ", text)

	# Remove espaços antes de pontuações
	text = re.sub(r"\s+([.,;:!?])", r"\1", text)

	# Remove espaços após parênteses de abertura e antes de fechamento
	text = re.sub(r"\(\s+", "(", text)
	text = re.sub(r"\s+\)", ")", text)

	# Remove espaços desnecessários entre colchetes ou chaves se quiser estender
	text = re.sub(r"\[\s+", "[", text)
	text = re.sub(r"\s+\]", "]", text)
	text = re.sub(r"\{\s+", "{", text)
	text = re.sub(r"\s+\}", "}", text)

	return text.strip()

	@router.get("/news")
	async def get_news(first: int = 15) -> List[Dict]:
	payload = {
	"query": QUERY,
	"variables": {"first": first}
	}

	async with httpx.AsyncClient(timeout=10.0) as client:
	response = await client.post(GRAPHQL_URL, headers=HEADERS, json=payload)

	if response.status_code != 200:
	raise HTTPException(status_code=502, detail="Erro ao acessar a API do IMDb")

	data = response.json().get("data")
	if not data:
	raise HTTPException(status_code=500, detail="Resposta inválida da API")

	combined = []

	for category_key in ["movieNews", "tvNews"]:
	for edge in data.get(category_key, {}).get("edges", []):
	node = edge.get("node", {})
	combined.append({
	"id": node.get("id"),
	"title": node.get("articleTitle", {}).get("plainText"),
	"url": node.get("externalUrl"),
	"date": node.get("date"),
	"text": clean_html(node.get("text", {}).get("plaidHtml")),
	"image": node.get("image", {}).get("url"),
	"category": category_key.replace("News", "").upper()
	})

	combined.sort(key=lambda x: x.get("date"), reverse=True)
	return combined