Spaces:

YixuanWang
/

Interactive-Recommendation-System

Sleeping

App Files Files Community

YixuanWang commited on Nov 15, 2024

Commit

28fe915

verified ·

1 Parent(s): 7154a46

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -66

app.py CHANGED Viewed

@@ -4,71 +4,187 @@ import numpy as np
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from textblob import TextBlob
-# Load the dataset from the local file
-data = pd.read_csv('twitter_dataset.csv')
-# Calculate sentiment polarity and popularity
-data['Sentiment'] = data['Text'].apply(lambda x: TextBlob(x).sentiment.polarity)
-data['Popularity'] = data['Retweets'] + data['Likes']
-data['Popularity'] = (data['Popularity'] - data['Popularity'].mean()) / data['Popularity'].std()
-data['Popularity'] = data['Popularity'] / data['Popularity'].abs().max()
-# Load the fake news classification model
-model_name = "hamzab/roberta-fake-news-classification"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device)
-# Process tweets in batches to avoid memory issues
-batch_size = 100
-predictions = []
-for i in range(0, len(data), batch_size):
-    batch = data['Text'][i:i + batch_size].tolist()
-    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128)
-    inputs = {key: val.to(device) for key, val in inputs.items()}
-    with torch.no_grad():
-        outputs = model(**inputs)
-    predictions.extend(outputs.logits.argmax(dim=1).cpu().numpy())
-data['Fake_News_Prediction'] = predictions
-data['Credibility'] = data['Fake_News_Prediction'].apply(lambda x: 1 if x == 1 else -1)
-# Define the prediction and recommendation function
-def predict_and_recommend(visibility_weight, sentiment_weight, popularity_weight):
-    # Adjust weights and calculate the final score
-    total_weight = visibility_weight + sentiment_weight + popularity_weight
-    visibility_weight /= total_weight
-    sentiment_weight /= total_weight
-    popularity_weight /= total_weight
-    # Update final visibility score with user-defined weights
-    data['User_Final_Visibility_Score'] = (
-        data['Credibility'] * visibility_weight +
-        data['Sentiment'] * sentiment_weight +
-        data['Popularity'] * popularity_weight
     )
-    # Sort and randomly sample 10 recommendations
-    top_100_data = data.nlargest(100, 'User_Final_Visibility_Score')
-    recommended_data = top_100_data.sample(10)
-    # Format output with empty lines between tweets
-    output = "\n\n".join(f"**Tweet**: {row['Text']}\n**Score**: {row['User_Final_Visibility_Score']:.2f}"
-                         for _, row in recommended_data.iterrows())
-    return output
-# Set up Gradio interface
-iface = gr.Interface(
-    fn=predict_and_recommend,
-    inputs=[
-        gr.Slider(0, 1, 0.5, label="Visibility Weight"),
-        gr.Slider(0, 1, 0.3, label="Sentiment Weight"),
-        gr.Slider(0, 1, 0.2, label="Popularity Weight")
-    ],
-    outputs="markdown",
-    title="Customizable Fake News Recommendation System",
-    description="Adjust weights to receive customized tweet recommendations based on visibility, sentiment, and popularity."
-)
-iface.launch()

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from textblob import TextBlob
+from typing import List, Dict, Tuple
+from dataclasses import dataclass
+from pathlib import Path
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@dataclass
+class RecommendationWeights:
+    visibility: float
+    sentiment: float
+    popularity: float
+class TweetPreprocessor:
+    def __init__(self, data_path: Path):
+        """Initialize the preprocessor with data path."""
+        self.data = self._load_data(data_path)
+    @staticmethod
+    def _load_data(data_path: Path) -> pd.DataFrame:
+        """Load and validate the dataset."""
+        try:
+            data = pd.read_csv(data_path)
+            required_columns = {'Text', 'Retweets', 'Likes'}
+            if not required_columns.issubset(data.columns):
+                raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}")
+            return data
+        except Exception as e:
+            logger.error(f"Error loading data: {e}")
+            raise
+    def calculate_metrics(self) -> pd.DataFrame:
+        """Calculate sentiment and popularity metrics."""
+        self.data['Sentiment'] = self.data['Text'].apply(self._get_sentiment)
+        self.data['Popularity'] = self._normalize_popularity()
+        return self.data
+    @staticmethod
+    def _get_sentiment(text: str) -> float:
+        """Calculate sentiment polarity for a text."""
+        try:
+            return TextBlob(str(text)).sentiment.polarity
+        except Exception as e:
+            logger.warning(f"Error calculating sentiment: {e}")
+            return 0.0
+    def _normalize_popularity(self) -> pd.Series:
+        """Normalize popularity scores using min-max scaling."""
+        popularity = self.data['Retweets'] + self.data['Likes']
+        return (popularity - popularity.mean()) / (popularity.std() or 1)
+class FakeNewsClassifier:
+    def __init__(self, model_name: str):
+        """Initialize the fake news classifier."""
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model_name = model_name
+        self.model, self.tokenizer = self._load_model()
+    def _load_model(self) -> Tuple[AutoModelForSequenceClassification, AutoTokenizer]:
+        """Load the model and tokenizer."""
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            model = AutoModelForSequenceClassification.from_pretrained(self.model_name).to(self.device)
+            return model, tokenizer
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            raise
+    @torch.no_grad()
+    def predict_batch(self, texts: List[str], batch_size: int = 100) -> np.ndarray:
+        """Predict fake news probability for a batch of texts."""
+        predictions = []
+        for i in range(0, len(texts), batch_size):
+            batch_texts = texts[i:i + batch_size]
+            inputs = self.tokenizer(
+                batch_texts,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=128
+            ).to(self.device)
+            outputs = self.model(**inputs)
+            batch_predictions = outputs.logits.argmax(dim=1).cpu().numpy()
+            predictions.extend(batch_predictions)
+        return np.array(predictions)
+class RecommendationSystem:
+    def __init__(self, data_path: Path, model_name: str):
+        """Initialize the recommendation system."""
+        self.preprocessor = TweetPreprocessor(data_path)
+        self.classifier = FakeNewsClassifier(model_name)
+        self.data = None
+        self.setup_system()
+    def setup_system(self):
+        """Set up the recommendation system."""
+        self.data = self.preprocessor.calculate_metrics()
+        predictions = self.classifier.predict_batch(self.data['Text'].tolist())
+        self.data['Credibility'] = [1 if pred == 1 else -1 for pred in predictions]
+    def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> str:
+        """Get tweet recommendations based on weights."""
+        if not self._validate_weights(weights):
+            return "Error: Invalid weights provided"
+        normalized_weights = self._normalize_weights(weights)
+        self.data['Final_Score'] = (
+            self.data['Credibility'] * normalized_weights.visibility +
+            self.data['Sentiment'] * normalized_weights.sentiment +
+            self.data['Popularity'] * normalized_weights.popularity
+        )
+        top_recommendations = (
+            self.data.nlargest(100, 'Final_Score')
+            .sample(num_recommendations)
+        )
+        return self._format_recommendations(top_recommendations)
+    @staticmethod
+    def _validate_weights(weights: RecommendationWeights) -> bool:
+        """Validate that weights are non-negative."""
+        return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
+    @staticmethod
+    def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
+        """Normalize weights to sum to 1."""
+        total = weights.visibility + weights.sentiment + weights.popularity
+        if total == 0:
+            return RecommendationWeights(1/3, 1/3, 1/3)
+        return RecommendationWeights(
+            visibility=weights.visibility / total,
+            sentiment=weights.sentiment / total,
+            popularity=weights.popularity / total
+        )
+    @staticmethod
+    def _format_recommendations(recommendations: pd.DataFrame) -> str:
+        """Format recommendations for display."""
+        return "\n\n".join(
+            f"**Tweet**: {row['Text']}\n**Score**: {row['Final_Score']:.2f}"
+            for _, row in recommendations.iterrows()
+        )
+def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
+    """Create and configure the Gradio interface."""
+    def predict_and_recommend(visibility_weight, sentiment_weight, popularity_weight):
+        weights = RecommendationWeights(visibility_weight, sentiment_weight, popularity_weight)
+        return recommendation_system.get_recommendations(weights)
+    return gr.Interface(
+        fn=predict_and_recommend,
+        inputs=[
+            gr.Slider(0, 1, 0.5, label="Visibility Weight"),
+            gr.Slider(0, 1, 0.3, label="Sentiment Weight"),
+            gr.Slider(0, 1, 0.2, label="Popularity Weight")
+        ],
+        outputs="markdown",
+        title="Enhanced Fake News Recommendation System",
+        description="Adjust weights to receive customized tweet recommendations based on visibility, sentiment, and popularity.",
+        theme="default"
     )
+def main():
+    """Main function to run the application."""
+    try:
+        recommendation_system = RecommendationSystem(
+            data_path=Path('twitter_dataset.csv'),
+            model_name="hamzab/roberta-fake-news-classification"
+        )
+        iface = create_gradio_interface(recommendation_system)
+        iface.launch()
+    except Exception as e:
+        logger.error(f"Application failed to start: {e}")
+        raise
+if __name__ == "__main__":
+    main()