import gradio as gr import pandas as pd import numpy as np import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification from textblob import TextBlob from typing import List, Dict, Tuple from dataclasses import dataclass from pathlib import Path import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @dataclass class RecommendationWeights: visibility: float sentiment: float popularity: float class TweetPreprocessor: def __init__(self, data_path: Path): self.data = self._load_data(data_path) self.model_name = "hamzab/roberta-fake-news-classification" self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model, self.tokenizer = self._load_model() def _load_model(self): tokenizer = AutoTokenizer.from_pretrained(self.model_name) model = AutoModelForSequenceClassification.from_pretrained(self.model_name).to(self.device) return model, tokenizer @staticmethod def _load_data(data_path: Path) -> pd.DataFrame: try: data = pd.read_csv(data_path) required_columns = {'Text', 'Retweets', 'Likes'} if not required_columns.issubset(data.columns): raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}") return data except Exception as e: logger.error(f"Error loading data: {e}") raise def calculate_metrics(self) -> pd.DataFrame: # Calculate sentiment self.data['Sentiment'] = self.data['Text'].apply(lambda x: TextBlob(x).sentiment.polarity) # Calculate popularity self.data['Popularity'] = self.data['Retweets'] + self.data['Likes'] self.data['Popularity'] = (self.data['Popularity'] - self.data['Popularity'].mean()) / self.data['Popularity'].std() self.data['Popularity'] = self.data['Popularity'] / self.data['Popularity'].abs().max() # Calculate credibility using fake news model batch_size = 100 predictions = [] for i in range(0, len(self.data), batch_size): batch = self.data['Text'][i:i + batch_size].tolist() inputs = self.tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128) inputs = {key: val.to(self.device) for key, val in inputs.items()} with torch.no_grad(): outputs = self.model(**inputs) predictions.extend(outputs.logits.argmax(dim=1).cpu().numpy()) self.data['Credibility'] = [1 if pred == 1 else -1 for pred in predictions] return self.data class RecommendationSystem: def __init__(self, data_path: Path): self.preprocessor = TweetPreprocessor(data_path) self.data = None self.setup_system() def setup_system(self): self.data = self.preprocessor.calculate_metrics() def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict: if not self._validate_weights(weights): return {"error": "Invalid weights provided"} normalized_weights = self._normalize_weights(weights) self.data['Final_Score'] = ( self.data['Credibility'] * normalized_weights.visibility + self.data['Sentiment'] * normalized_weights.sentiment + self.data['Popularity'] * normalized_weights.popularity ) top_recommendations = ( self.data.nlargest(100, 'Final_Score') .sample(num_recommendations) ) return self._format_recommendations(top_recommendations) def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict: formatted_results = [] for _, row in recommendations.iterrows(): score_details = { "score": f"{row['Final_Score']:.2f}", "credibility": "Reliable" if row['Credibility'] > 0 else "Uncertain", "sentiment": self._get_sentiment_label(row['Sentiment']), "popularity": f"{row['Popularity']:.2f}", "engagement": f"Likes {row['Likes']} ยท Retweets {row['Retweets']}" } formatted_results.append({ "text": row['Text'], "scores": score_details }) return { "recommendations": formatted_results, "score_explanation": self._get_score_explanation() } @staticmethod def _get_sentiment_label(sentiment_score: float) -> str: if sentiment_score > 0.3: return "Positive" elif sentiment_score < -0.3: return "Negative" return "Neutral" @staticmethod def _validate_weights(weights: RecommendationWeights) -> bool: return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__) @staticmethod def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights: total = weights.visibility + weights.sentiment + weights.popularity if total == 0: return RecommendationWeights(1/3, 1/3, 1/3) return RecommendationWeights( visibility=weights.visibility / total, sentiment=weights.sentiment / total, popularity=weights.popularity / total ) @staticmethod def _get_score_explanation() -> Dict[str, str]: return { "Credibility": "Content reliability assessment", "Sentiment": "Text emotional analysis result", "Popularity": "Score based on likes and retweets" } def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface: with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.Markdown(""" # Tweet Recommendation System Adjust weights to get personalized recommendations Note: To protect user privacy, some tweet content has been redacted or anonymized. """) with gr.Row(): with gr.Column(scale=1): visibility_weight = gr.Slider(0, 1, 0.5, label="Credibility Weight", info="Adjust importance of content credibility") sentiment_weight = gr.Slider(0, 1, 0.3, label="Sentiment Weight", info="Adjust importance of emotional tone") popularity_weight = gr.Slider(0, 1, 0.2, label="Popularity Weight", info="Adjust importance of engagement metrics") submit_btn = gr.Button("Get Recommendations", variant="primary") with gr.Column(scale=2): output_html = gr.HTML() def format_recommendations(raw_recommendations): html = '