m-check / news_checker.py
Ozgur Unlu
changed the grammar checking model
b00d113
raw
history blame
3.19 kB
import os
from newsapi import NewsApiClient
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime, timedelta
load_dotenv()
class NewsChecker:
def __init__(self):
self.api_key = os.getenv('NEWS_API_KEY')
if not self.api_key:
print("WARNING: NEWS_API_KEY not found in environment variables")
else:
print("NEWS_API_KEY found in environment variables")
try:
self.newsapi = NewsApiClient(api_key=self.api_key)
except Exception as e:
print(f"Error initializing NewsAPI client: {str(e)}")
def get_recent_news(self):
if not self.api_key:
print("Cannot fetch news: No API key configured")
return pd.DataFrame()
try:
# Get news from the last 7 days
week_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
response = self.newsapi.get_everything(
q='',
from_param=week_ago,
language='en',
sort_by='relevancy',
page_size=100
)
if response['status'] == 'ok':
articles = response['articles']
# Extract titles and descriptions
news_data = [
{
'title': article['title'],
'description': article['description']
}
for article in articles if article['description']
]
print(f"Successfully fetched {len(news_data)} articles")
return pd.DataFrame(news_data)
else:
print(f"NewsAPI response status was not 'ok': {response.get('status')}")
return pd.DataFrame()
except Exception as e:
print(f"Error fetching news: {str(e)}")
return pd.DataFrame()
def check_content_against_news(self, marketing_text):
news_df = self.get_recent_news()
if news_df.empty:
return {
'status': 'warning',
'message': 'Unable to check against current news context. Proceed with caution.'
}
# Simple keyword matching for demo purposes
marketing_words = set(marketing_text.lower().split())
potential_conflicts = []
for _, row in news_df.iterrows():
title_words = set(row['title'].lower().split())
desc_words = set(str(row['description']).lower().split())
# Check for significant word overlap
if len(marketing_words.intersection(title_words)) >= 3:
potential_conflicts.append(row['title'])
if potential_conflicts:
return {
'status': 'warning',
'message': 'Potential conflicts found with current news:\n- ' + '\n- '.join(potential_conflicts)
}
return {
'status': 'pass',
'message': 'No significant conflicts with current news found.'
}