Spaces:

Dunevhhhh
/

Test2

Build error

App Files Files Community

Test2 / fetch_news.py

Dunevhhhh

Create fetch_news.py

b58bf24 verified 2 months ago

raw

history blame contribute delete

4.29 kB

	# fetch_news.py
	import aiohttp
	import feedparser
	import asyncio
	from datetime import datetime, timedelta
	from config import load_feeds, load_api_keys, SETTINGS
	import logging
	from urllib.parse import urlparse, urlunparse

	logger = logging.getLogger(__name__)

	class NewsFetcher:
	def __init__(self):
	self.feeds = load_feeds()
	self.api_keys = load_api_keys()
	self.session = None

	async def __aenter__(self):
	self.session = aiohttp.ClientSession(
	timeout=aiohttp.ClientTimeout(total=SETTINGS["request_timeout"]),
	headers={"User-Agent": "NewsBot/1.0"}
	)
	return self

	async def __aexit__(self, *exc):
	await self.session.close()
	self.session = None

	async def fetch(self, url: str, source_type: str = "rss") -> Union[str, Dict, None]:
	retries = 0
	while retries < SETTINGS["max_retries"]:
	try:
	async with self.session.get(url) as response:
	if response.status == 200:
	return await response.json() if source_type == "newsapi" else await response.text()
	logger.warning(f"Fehler beim Abrufen von {url}: Status {response.status}")
	except Exception as e:
	logger.error(f"Verbindungsfehler: {str(e)}")
	retries += 1
	await asyncio.sleep(5)
	return None

	async def process_rss(self, feed_config: Dict, processed_links: set) -> List[Dict]:
	content = await self.fetch(feed_config["url"])
	if not content:
	return []

	feed = feedparser.parse(content)
	articles = []
	cutoff_time = datetime.now() - timedelta(hours=24)

	for entry in feed.entries[:SETTINGS["max_articles"]]:
	try:
	pub_date = datetime(*entry.published_parsed[:6]) if hasattr(entry, 'published_parsed') else datetime.now()
	if pub_date < cutoff_time:
	continue

	article = {
	"title": entry.title,
	"link": self.normalize_url(entry.link),
	"source": feed_config["name"],
	"description": entry.get("summary", "")[:500],
	"published": pub_date,
	"category": feed_config.get("category", "general")
	}

	if article["link"] not in processed_links:
	articles.append(article)
	processed_links.add(article["link"])
	except Exception as e:
	logger.error(f"Fehler beim Verarbeiten des Artikels: {str(e)}")
	return articles

	async def get_news_api(self, feed_config: Dict, processed_links: set) -> List[Dict]:
	api_key = self.api_keys.get("newsapi")
	if not api_key:
	logger.error("NewsAPI-Key fehlt!")
	return []

	url = f"https://newsapi.org/v2/everything?q={feed_config['query']}&pageSize={SETTINGS['newsapi_page_size']}&apiKey={api_key}"
	data = await self.fetch(url, "newsapi")
	if not data:
	return []

	articles = []
	cutoff_time = datetime.now() - timedelta(hours=24)

	for article in data.get("articles", []):
	try:
	pub_date = datetime.fromisoformat(article["publishedAt"].rstrip("Z"))
	if pub_date < cutoff_time:
	continue

	entry = {
	"title": article["title"],
	"link": self.normalize_url(article["url"]),
	"source": feed_config["name"],
	"description": article.get("description", "")[:500],
	"published": pub_date,
	"category": feed_config.get("category", "general")
	}

	if entry["link"] not in processed_links:
	articles.append(entry)
	processed_links.add(entry["link"])
	except Exception as e:
	logger.error(f"Fehler bei NewsAPI-Artikel: {str(e)}")
	return articles

	def normalize_url(self, url: str) -> str:
	parsed = urlparse(url)
	return urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))