from flask import Flask, render_template, request, send_file, redirect import json import requests from urllib.request import urlopen from bs4 import BeautifulSoup import re import joblib from nltk.sentiment import SentimentIntensityAnalyzer import matplotlib.pyplot as plt from matplotlib import font_manager from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas plt.switch_backend('Agg') import numpy as np from io import BytesIO import base64 from dotenv import load_dotenv import os import threading import nltk nltk.download('vader_lexicon') def load_classifier_and_vectorizer(): global classifier, vectorizer classifier = joblib.load('./data/sentiment_classifier.pkl') vectorizer = joblib.load('./data/sentiment_vectorizer.pkl') # Start loading in a separate thread loading_thread = threading.Thread(target=load_classifier_and_vectorizer) loading_thread.start() app = Flask(__name__) load_dotenv() # Function to get Google search links def search_links(query): base_url = "https://www.googleapis.com/customsearch/v1" api_key = os.environ['api_key'] cx = os.environ['cx'] params = { 'q': query, 'key': api_key, 'cx': cx } Err = "" response = requests.get(base_url, params=params) if response.status_code == 429 or response.status_code == 403: Err = "Daily API limit reached" print("API limit reached.") return [] results = response.json().get('items', []) # Extract title and link from the results and create a list of dictionaries links_with_titles = [[{item.get('title', ''): item.get('link', '')} for item in results], Err] return links_with_titles # Function to fetch reviews def get_reviews(movie_url, review_type): page = urlopen(movie_url) html_bytes = page.read() html = html_bytes.decode("utf-8") emsId = re.search(r'"emsId":\s*"([^"]+)"', html, re.IGNORECASE).group(1) title = re.search(r'"titleName":\s*"([^"]+)"', html, re.IGNORECASE).group(1) title_type = re.search(r'"titleType":\s*"([^"]+)"', html, re.IGNORECASE).group(1) vanity = re.search(r'"vanity":\s*"([^"]+)"', html, re.IGNORECASE).group(1) s_no = "" if title_type == "Tv": s_no = re.search(r'"tvSeason":\s*"([^"]+)"', html, re.IGNORECASE).group(1) poster_page_url = f'https://www.rottentomatoes.com/{"tv" if title_type == "Tv" else "m"}/{vanity}/{f"{s_no}/" if s_no else ""}reviews' poster_page = urlopen(poster_page_url) soup = BeautifulSoup(poster_page, 'html.parser') poster_link = soup.find('img', {'data-qa': 'sidebar-poster-img'}) if poster_link: poster_link = poster_link.get('src') else: poster_link = '/data/favicon.ico' hasNextPage = True reviews = [] after = "" while hasNextPage and len(reviews) < 100: url = f'https://www.rottentomatoes.com/napi/{"season" if title_type == "Tv" else "movie"}/{emsId}/reviews/{review_type}?after={after}' response = urlopen(url).read().decode('utf-8') response_object = json.loads(response) responseArray = response_object['reviews'] for review in responseArray: reviews.append(review['quote']) hasNextPage = response_object['pageInfo']['hasNextPage'] if hasNextPage: after = response_object['pageInfo']['endCursor'] global show_info show_info = [title, poster_link] return reviews # Function to clean text def clean(input_string): cleaned_string = re.sub(r'\s+', ' ', input_string) return cleaned_string.strip() def analyze_sentiment(reviews): loading_thread.join() # Clean and analyze reviews cleaned_reviews = [clean(phrase) for phrase in reviews] new_vector = vectorizer.transform(cleaned_reviews) predictions = classifier.predict(new_vector) # Return predictions return predictions # Function to get polarity scores using NLTK def get_polarity_scores(reviews): cleaned_reviews = [clean(phrase) for phrase in reviews] sia = SentimentIntensityAnalyzer() scores_list = [] for entry in reviews: scores = sia.polarity_scores(entry) scores_list.append(scores) return scores_list #Load Font custom_font_path = 'data/HPSimplified.ttf' font_manager.fontManager.addfont(custom_font_path) # Function to plot a bar chart def plot_bar(data, title): # Extracting values for each sentiment neg_values = [entry['neg'] for entry in data] neu_values = [entry['neu'] for entry in data] pos_values = [entry['pos'] for entry in data] compound_values = [entry['compound'] for entry in data] # Creating positions for bars positions = range(len(data)) # Plotting the stacked bars fig, ax = plt.subplots(figsize=(20,10)) width = 0.7 # Bottom bar (red) ax.bar(positions, neg_values, width=width, color='#961e1e', label='Negative') # Middle bar (gray) ax.bar(positions, neu_values, width=width, bottom=neg_values, color='#999', label='Neutral') # Top bar (green) ax.bar(positions, pos_values, width=width, bottom=np.array(neg_values) + np.array(neu_values), color='#015501', label='Positive') # Adding labels and title plt.xlabel('Reviews', fontdict={'fontname': 'HP Simplified', 'fontsize': 30, 'weight':'bold', 'color':'#fff'}, labelpad=20) plt.ylabel('Polarity Scores', fontdict={'fontname': 'HP Simplified', 'fontsize': 30, 'weight':'bold', 'color':'#fff'}, labelpad=20) plt.title(title, fontdict={'fontname': 'HP Simplified', 'fontsize': 40, 'weight':'bold', 'color':'#fff'}, pad=20) plt.yticks(fontname='HP Simplified', fontsize=24, color="#fff") # Remove X-axis labels ax.set_xticks([]) ax.set_facecolor('#000') # Adding legend legend = plt.legend(loc='upper right', bbox_to_anchor=(1.25, 1), prop={'family': 'HP Simplified', 'size': 32}) average = np.mean(compound_values) text = f'Average Compound Score' avg_text = f'\n{average:.2f}' plt.text(0.91, 0.66, text, fontsize=20, fontname='HP Simplified', weight="bold", color="white", ha='center', va='center', transform=fig.transFigure) plt.text(0.91, 0.64, avg_text, fontsize=36, fontname='HP Simplified', weight="bold", color="white", ha='center', va='center', transform=fig.transFigure) # Set the background color fig.set_facecolor('#1e1e1e') plt.tight_layout() # Convert the Matplotlib figure to a Flask response output = BytesIO() FigureCanvas(fig).print_png(output) plt.close() return base64.b64encode(output.getvalue()).decode('utf-8') def plot_pie(data, title): unique, counts = np.unique(data, return_counts=True) explode = () if len(unique) > 1: explode = (0, 0.1) else: explode = (0,) #add colors colors = [] if unique[0] == "Negative": colors = ['#961e1e','#024d0f'] elif unique[0] == "Positive": colors = ['#024d0f','#961e1e'] fig, ax = plt.subplots() fig.patch.set_facecolor('#1e1e1e') ax.pie(counts, explode=explode, labels=unique, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90) # Set custom fonts for title, labels, and autopct ax.set_title(title, fontdict={'family': 'HP Simplified', 'color': 'White', 'weight': 'bold', 'size': 28}) for text in ax.texts: text.set_fontfamily('HP Simplified') text.set_fontsize('20') text.set_fontweight('bold') text.set_color('White') # Equal aspect ratio ensures that pie is drawn as a circle ax.axis('equal') plt.tight_layout() output = BytesIO() FigureCanvas(fig).print_png(output) plt.close() return base64.b64encode(output.getvalue()).decode('utf-8') @app.route('/data/bg.png') def bg(): return send_file('data/bg.png') @app.route('/data/search-icon.svg') def search_icon(): return send_file('data/search-icon.svg') @app.route('/data/favicon.ico') def favicon(): return send_file('data/favicon.ico') @app.route('/data/HPSimplified.ttf') def font(): return send_file('data/HPSimplified.ttf') @app.route('/data/search.gif') def searchload(): return send_file('./data/search.gif') @app.route('/') def home(): return render_template('index.html') @app.route('/search') def search(): query = request.args.get('query') if query: search_data = search_links(query) if len(search_data[0]) == 0: search_data[1] = "No links found for your query" return render_template('search.html', links=search_data[0], Err=search_data[1]) else: return redirect('/') @app.route('/review') def review(): url = request.args.get('url') if url: url = url user = get_reviews(url, 'user') critic = get_reviews(url, 'all') user_pie_img, user_bar_img, critic_pie_img, critic_bar_img = '', '', '', '' if len(user) > 0: user_polarity = get_polarity_scores(user) user_reviews = analyze_sentiment(user) user_bar = plot_bar(user_polarity, "User Sentiments") user_pie = plot_pie(user_reviews, "User Reviews") user_bar_img = f'User Reviews Bar Plot' user_pie_img = f'User Reviews Pie Plot' else: user_bar_img = "No User Reviews Found" user_pie_img = "No User Reviews Found" if len(critic) > 0: critic_polarity = get_polarity_scores(critic) critic_reviews = analyze_sentiment(critic) critic_bar = plot_bar(critic_polarity, "Critic Sentiments") critic_pie = plot_pie(critic_reviews, "Critic Reviews") critic_bar_img = f'Critic Reviews Bar Plot' critic_pie_img = f'Critic Reviews Pie Plot' else: critic_bar_img = "No Critic Reviews Found" critic_pie_img = "No Critic Reviews Found" return render_template('review.html', user_bar=user_bar_img, user_pie=user_pie_img, critic_bar=critic_bar_img, critic_pie=critic_pie_img, title=show_info[0], url=url, poster=show_info[1]) else: return redirect('/') @app.errorhandler(404) def not_found_error(error): return render_template('404.html'), 404 if __name__ == '__main__': app.run(host='0.0.0.0', port=3000, debug=True)