Spaces:

YixuanWang
/

Interactive-Recommendation-System

Sleeping

App Files Files Community

YixuanWang commited on Nov 15, 2024

Commit

37190a8

verified ·

1 Parent(s): 596f852

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -55

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
 from textblob import TextBlob
 from typing import List, Dict, Tuple
 from dataclasses import dataclass
 from pathlib import Path
 import logging
-import re
-from datetime import datetime
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -20,12 +20,18 @@ class RecommendationWeights:
 class TweetPreprocessor:
     def __init__(self, data_path: Path):
-        """Initialize the preprocessor with data path."""
         self.data = self._load_data(data_path)
     @staticmethod
     def _load_data(data_path: Path) -> pd.DataFrame:
-        """Load and validate the dataset."""
         try:
             data = pd.read_csv(data_path)
             required_columns = {'Text', 'Retweets', 'Likes'}
@@ -36,39 +42,28 @@ class TweetPreprocessor:
             logger.error(f"Error loading data: {e}")
             raise
-    def _clean_text(self, text: str) -> str:
-        """Clean text content."""
-        if pd.isna(text) or len(str(text).strip()) < 10:
-            return ""
-        text = re.sub(r'http\S+|www.\S+', '', str(text))
-        text = re.sub(r'[^\w\s]', '', text)
-        text = ' '.join(text.split())
-        return text
     def calculate_metrics(self) -> pd.DataFrame:
-        """Calculate all metrics for tweets."""
-        self.data['Clean_Text'] = self.data['Text'].apply(self._clean_text)
-        self.data = self.data[self.data['Clean_Text'].str.len() > 0]
-        self.data['Sentiment'] = self.data['Clean_Text'].apply(self._get_sentiment)
-        self.data['Popularity'] = self._normalize_popularity()
         return self.data
-    @staticmethod
-    def _get_sentiment(text: str) -> float:
-        """Calculate sentiment polarity for a text."""
-        try:
-            return TextBlob(str(text)).sentiment.polarity
-        except Exception as e:
-            logger.warning(f"Error calculating sentiment: {e}")
-            return 0.0
-    def _normalize_popularity(self) -> pd.Series:
-        """Normalize popularity scores."""
-        popularity = self.data['Retweets'] + self.data['Likes']
-        return (popularity - popularity.min()) / (popularity.max() - popularity.min() + 1e-6)
 class RecommendationSystem:
     def __init__(self, data_path: Path):
@@ -77,36 +72,28 @@ class RecommendationSystem:
         self.setup_system()
     def setup_system(self):
-        """Initialize the system with preprocessed data."""
         self.data = self.preprocessor.calculate_metrics()
-    def recalculate_scores(self, weights: RecommendationWeights):
-        """Recalculate scores based on new weights."""
         normalized_weights = self._normalize_weights(weights)
-        self.data['Credibility'] = np.random.choice([0, 1], size=len(self.data), p=[0.3, 0.7])
         self.data['Final_Score'] = (
             self.data['Credibility'] * normalized_weights.visibility +
             self.data['Sentiment'] * normalized_weights.sentiment +
             self.data['Popularity'] * normalized_weights.popularity
         )
-    def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
-        """Get tweet recommendations based on weights."""
-        if not self._validate_weights(weights):
-            return {"error": "Invalid weights provided"}
-        self.recalculate_scores(weights)
         top_recommendations = (
-            self.data.nlargest(num_recommendations, 'Final_Score')
         )
         return self._format_recommendations(top_recommendations)
     def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
-        """Format recommendations for display."""
         formatted_results = []
         for _, row in recommendations.iterrows():
             score_details = {
@@ -118,7 +105,7 @@ class RecommendationSystem:
             }
             formatted_results.append({
-                "text": row['Clean_Text'],
                 "scores": score_details
             })
@@ -129,7 +116,6 @@ class RecommendationSystem:
     @staticmethod
     def _get_sentiment_label(sentiment_score: float) -> str:
-        """Convert sentiment score to label."""
         if sentiment_score > 0.3:
             return "Positive"
         elif sentiment_score < -0.3:
@@ -138,12 +124,10 @@ class RecommendationSystem:
     @staticmethod
     def _validate_weights(weights: RecommendationWeights) -> bool:
-        """Validate that weights are non-negative."""
         return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
     @staticmethod
     def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
-        """Normalize weights to sum to 1."""
         total = weights.visibility + weights.sentiment + weights.popularity
         if total == 0:
             return RecommendationWeights(1/3, 1/3, 1/3)
@@ -155,7 +139,6 @@ class RecommendationSystem:
     @staticmethod
     def _get_score_explanation() -> Dict[str, str]:
-        """Provide explanation for different score components."""
         return {
             "Credibility": "Content reliability assessment",
             "Sentiment": "Text emotional analysis result",
@@ -163,7 +146,6 @@ class RecommendationSystem:
         }
 def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
-    """Create and configure the Gradio interface."""
     with gr.Blocks(theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # Tweet Recommendation System
@@ -224,7 +206,6 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
             return html
         def get_recommendations_with_weights(v, s, p):
-            """Get recommendations with current weights."""
             weights = RecommendationWeights(v, s, p)
             return format_recommendations(recommendation_system.get_recommendations(weights))
@@ -237,7 +218,6 @@ def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.I
     return interface
 def main():
-    """Main function to run the application."""
     try:
         recommendation_system = RecommendationSystem(
             data_path=Path('twitter_dataset.csv')

 import gradio as gr
 import pandas as pd
 import numpy as np
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from textblob import TextBlob
 from typing import List, Dict, Tuple
 from dataclasses import dataclass
 from pathlib import Path
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class TweetPreprocessor:
     def __init__(self, data_path: Path):
         self.data = self._load_data(data_path)
+        self.model_name = "hamzab/roberta-fake-news-classification"
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model, self.tokenizer = self._load_model()
+    def _load_model(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        model = AutoModelForSequenceClassification.from_pretrained(self.model_name).to(self.device)
+        return model, tokenizer
     @staticmethod
     def _load_data(data_path: Path) -> pd.DataFrame:
         try:
             data = pd.read_csv(data_path)
             required_columns = {'Text', 'Retweets', 'Likes'}
             logger.error(f"Error loading data: {e}")
             raise
     def calculate_metrics(self) -> pd.DataFrame:
+        # Calculate sentiment
+        self.data['Sentiment'] = self.data['Text'].apply(lambda x: TextBlob(x).sentiment.polarity)
+        # Calculate popularity
+        self.data['Popularity'] = self.data['Retweets'] + self.data['Likes']
+        self.data['Popularity'] = (self.data['Popularity'] - self.data['Popularity'].mean()) / self.data['Popularity'].std()
+        self.data['Popularity'] = self.data['Popularity'] / self.data['Popularity'].abs().max()
+        # Calculate credibility using fake news model
+        batch_size = 100
+        predictions = []
+        for i in range(0, len(self.data), batch_size):
+            batch = self.data['Text'][i:i + batch_size].tolist()
+            inputs = self.tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128)
+            inputs = {key: val.to(self.device) for key, val in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                predictions.extend(outputs.logits.argmax(dim=1).cpu().numpy())
+        self.data['Credibility'] = [1 if pred == 1 else -1 for pred in predictions]
         return self.data
 class RecommendationSystem:
     def __init__(self, data_path: Path):
         self.setup_system()
     def setup_system(self):
         self.data = self.preprocessor.calculate_metrics()
+    def get_recommendations(self, weights: RecommendationWeights, num_recommendations: int = 10) -> Dict:
+        if not self._validate_weights(weights):
+            return {"error": "Invalid weights provided"}
         normalized_weights = self._normalize_weights(weights)
         self.data['Final_Score'] = (
             self.data['Credibility'] * normalized_weights.visibility +
             self.data['Sentiment'] * normalized_weights.sentiment +
             self.data['Popularity'] * normalized_weights.popularity
         )
         top_recommendations = (
+            self.data.nlargest(100, 'Final_Score')
+            .sample(num_recommendations)
         )
         return self._format_recommendations(top_recommendations)
     def _format_recommendations(self, recommendations: pd.DataFrame) -> Dict:
         formatted_results = []
         for _, row in recommendations.iterrows():
             score_details = {
             }
             formatted_results.append({
+                "text": row['Text'],
                 "scores": score_details
             })
     @staticmethod
     def _get_sentiment_label(sentiment_score: float) -> str:
         if sentiment_score > 0.3:
             return "Positive"
         elif sentiment_score < -0.3:
     @staticmethod
     def _validate_weights(weights: RecommendationWeights) -> bool:
         return all(getattr(weights, field) >= 0 for field in weights.__dataclass_fields__)
     @staticmethod
     def _normalize_weights(weights: RecommendationWeights) -> RecommendationWeights:
         total = weights.visibility + weights.sentiment + weights.popularity
         if total == 0:
             return RecommendationWeights(1/3, 1/3, 1/3)
     @staticmethod
     def _get_score_explanation() -> Dict[str, str]:
         return {
             "Credibility": "Content reliability assessment",
             "Sentiment": "Text emotional analysis result",
         }
 def create_gradio_interface(recommendation_system: RecommendationSystem) -> gr.Interface:
     with gr.Blocks(theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # Tweet Recommendation System
             return html
         def get_recommendations_with_weights(v, s, p):
             weights = RecommendationWeights(v, s, p)
             return format_recommendations(recommendation_system.get_recommendations(weights))
     return interface
 def main():
     try:
         recommendation_system = RecommendationSystem(
             data_path=Path('twitter_dataset.csv')