import streamlit as st
import pickle
import pandas as pd
import joblib
from preprocessText import preprocess
from apiSearch import get_metadata


# Load the model
model = joblib.load('85pct.pkl')

# Define the categories
categories = {
    'Film & Animation': 1,
    'Autos & Vehicles': 2,
    'Music': 10,
    'Pets & Animals': 15,
    'Sports' : 17,
    'Short Movies' : 18,
    'Travel & Events' : 19,
    'Gaming' : 20,
    'Videoblogging' : 21,
    'People & Blogs' : 22,
    'Comedy' : 23,
    'Entertainment' : 24,
    'News & Politics' : 25,
    'Howto & Style' : 26,
    'Education' : 27,
    'Science & Technology' : 28,
    'Nonprofits & Activism' : 29
}

# Create the Streamlit web application
def main():
    st.title("YouTube Trend Prediction")
    st.write("Enter the video details below:")
    getTitle = ""
    getDuration = 0.00
    getCategory = 1
    
    # Input fields
    url = st.text_input("URL")
    if url:
        metadata = get_metadata(url)
        getTitle=metadata['title'].iloc[0]
        getDuration = metadata['duration'].iloc[0]
        category_id = metadata['category_id'].iloc[0]
        getCategory = int(category_id)
   
    title = st.text_input("Title",value=getTitle)
    duration = st.number_input("Duration (in minutes)", min_value=0.0,value=getDuration)
    category = st.selectbox("Category", list(categories.keys()),index=list(categories.values()).index(getCategory))

    # Convert category to category ID
    categoryId = categories[category]
    
    # Predict button
    if st.button("Predict"):
        # Perform prediction
        prediction = predict_trend(title, duration, categoryId)
       
        if prediction[0] == 1:
            st.success("This video is predicted to be a trend!")
        else:
            st.info("This video is predicted not to be a trend.")

# Function to make predictions
def predict_trend(title, duration, category_id):
    duration = str(duration)
    category_id = str(category_id)
    clean_new_title = preprocess(title)
    # Join the preprocessed words back into a string
    clean_new_title_str = ' '.join(clean_new_title)
    # Prepare the input data
    data = {
        'cleanTitle': [clean_new_title_str],
        'titleLength' : [len(title)],
        'categoryId': [category_id],
        'duration': [duration]
    }
    data = pd.DataFrame(data)
    data['categoryId'] = data['categoryId'].astype('category')
    data['duration'] = data['duration'].astype('float64')
    # Make the prediction
    print(model.predict_proba(data))
    prediction = model.predict(data)
    return prediction

if __name__ == "__main__":
    main()