import streamlit as st import pickle import pandas as pd import joblib from preprocessText import preprocess from apiSearch import get_metadata # Load the model model = joblib.load('85pct.pkl') # Define the categories categories = { 'Film & Animation': 1, 'Autos & Vehicles': 2, 'Music': 10, 'Pets & Animals': 15, 'Sports' : 17, 'Short Movies' : 18, 'Travel & Events' : 19, 'Gaming' : 20, 'Videoblogging' : 21, 'People & Blogs' : 22, 'Comedy' : 23, 'Entertainment' : 24, 'News & Politics' : 25, 'Howto & Style' : 26, 'Education' : 27, 'Science & Technology' : 28, 'Nonprofits & Activism' : 29 } # Create the Streamlit web application def main(): st.title("YouTube Trend Prediction") st.write("Enter the video details below:") getTitle = "" getDuration = 0.00 getCategory = 1 # Input fields url = st.text_input("URL") if url: metadata = get_metadata(url) getTitle=metadata['title'].iloc[0] getDuration = metadata['duration'].iloc[0] category_id = metadata['category_id'].iloc[0] getCategory = int(category_id) title = st.text_input("Title",value=getTitle) duration = st.number_input("Duration (in minutes)", min_value=0.0,value=getDuration) category = st.selectbox("Category", list(categories.keys()),index=list(categories.values()).index(getCategory)) # Convert category to category ID categoryId = categories[category] # Predict button if st.button("Predict"): # Perform prediction prediction = predict_trend(title, duration, categoryId) if prediction[0] == 1: st.success("This video is predicted to be a trend!") else: st.info("This video is predicted not to be a trend.") # Function to make predictions def predict_trend(title, duration, category_id): duration = str(duration) category_id = str(category_id) clean_new_title = preprocess(title) # Join the preprocessed words back into a string clean_new_title_str = ' '.join(clean_new_title) # Prepare the input data data = { 'cleanTitle': [clean_new_title_str], 'titleLength' : [len(title)], 'categoryId': [category_id], 'duration': [duration] } data = pd.DataFrame(data) data['categoryId'] = data['categoryId'].astype('category') data['duration'] = data['duration'].astype('float64') # Make the prediction print(model.predict_proba(data)) prediction = model.predict(data) return prediction if __name__ == "__main__": main()