Spaces:
Sleeping
Sleeping
import ast | |
import os | |
import pickle | |
import random | |
from datetime import datetime, timedelta | |
import gradio as gr | |
import pandas as pd | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.messages import HumanMessage, SystemMessage | |
from pytrends.request import TrendReq | |
from mlxtend.preprocessing import TransactionEncoder | |
def convert_keywords_to_list(keywords_str): | |
try: | |
return ast.literal_eval(keywords_str) | |
except (SyntaxError, ValueError): | |
return [] | |
def convert_scores_to_list(scores_float): | |
try: | |
return ast.literal_eval(scores_float) | |
except (SyntaxError, ValueError): | |
return [] | |
video_df = pd.read_csv('video_df_complete.csv') | |
video_df['keywords'] = video_df['keywords'].apply(convert_keywords_to_list) | |
video_df['trend_scores'] = video_df['trend_scores'].apply(convert_scores_to_list) | |
video_df['total_score'] = video_df['trend_scores'].apply(lambda x: sum(x) / len(x) if len(x) > 0 else 0) | |
transactions = [] | |
for index, row in video_df.iterrows(): | |
transactions.append(row['keywords']) | |
te = TransactionEncoder() | |
te_ary = te.fit(transactions).transform(transactions) | |
df = pd.DataFrame(te_ary, columns=te.columns_) | |
merged_df = pd.concat([df, video_df['total_score'], video_df['engagement_rate']], axis=1) | |
rules = pd.read_csv('association_rules.csv') | |
rules['antecedents'] = rules['antecedents'].apply(lambda x: list(eval(x))) | |
rules['consequents'] = rules['consequents'].apply(lambda x: list(eval(x))) | |
model_filename = os.path.join('regression_model_final.pkl') | |
with open(model_filename, 'rb') as file: | |
model = pickle.load(file) | |
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", convert_system_message_to_human=True) | |
def custom_predict(keywords, total_score): | |
""" | |
Custom prediction function using the trained linear regression model. | |
Args: | |
keywords: A list of keywords. | |
total_score: The total trend score. | |
Returns: | |
The predicted engagement rate. | |
""" | |
new_data = pd.DataFrame([{col: 0 for col in merged_df.columns}]) | |
for keyword in keywords: | |
if keyword in new_data.columns: | |
new_data.at[0, keyword] = 1 | |
new_data.at[0, 'total_score'] = total_score | |
new_data = new_data.drop('engagement_rate', axis=1) | |
prediction = model.predict(new_data) | |
return prediction[0][0] | |
def generate_keyword_scores(keywords): | |
scaled_rate = min(100, 4.5 * 10) | |
return [ | |
round(random.uniform(scaled_rate * 0.7, min(100, scaled_rate * 1.2)), 2) | |
for _ in keywords | |
] | |
def get_google_trends_score(keywords, end_date, days_back=7): | |
""" | |
Mengambil skor tren Google untuk kata kunci tertentu selama periode waktu tertentu. | |
Parameters: | |
keywords (list): Daftar kata kunci yang ingin dianalisis. | |
end_date (datetime): Tanggal akhir untuk data tren. | |
days_back (int): Jumlah hari ke belakang dari end_date untuk menentukan rentang waktu (default: 7 hari). | |
Returns: | |
pd.DataFrame: DataFrame berisi data tren per kata kunci selama periode waktu yang ditentukan. | |
""" | |
try: | |
if not keywords: | |
raise ValueError("Daftar kata kunci tidak boleh kosong.") | |
pytrends = TrendReq() | |
start_date = end_date - timedelta(days=days_back) | |
timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}" | |
pytrends.build_payload(keywords, timeframe=timeframe, geo='ID', gprop='youtube') | |
trends_df = pytrends.interest_over_time() | |
if 'isPartial' in trends_df.columns: | |
trends_df = trends_df.drop(columns=['isPartial']) | |
return trends_df | |
except Exception as e: | |
return pd.DataFrame(generate_keyword_scores(keywords)) | |
def generate_title(keyword, category): | |
if category != 'Gaming': | |
return "Category belum supported." | |
recommendation = recommend_keyword(keyword) | |
if not recommendation: | |
return "No recommendations found." | |
else: | |
result = llm( | |
[ | |
SystemMessage( | |
content=f"Kamu adalah seorang penulis judul video youtube" | |
f"Kamu akan diberikan beberapa buah keyword yang wajib digunakan untuk judul" | |
f"Buat judul yang semenarik mungkin untuk memberikan viewer rasa suka" | |
f"Cukup keluarkan satu judul saja dalam satu kalimat" | |
f"Jangan gunnakan formatting seperti '\n' atau hal lainnya. Gunakan saja raw string" | |
f"Boleh pake emoji" | |
), | |
HumanMessage( | |
content=f"keyword yang digunakan adalah sebagai berikut: {recommendation}" | |
f"Total jumlah keyword adalah: {len(recommendation)}" | |
f"Video memiliki kategori: {category}" | |
) | |
] | |
) | |
return result.content | |
def recommend_keyword(keyword): | |
keyword_rules = rules[ | |
rules['antecedents'].astype(str).str.contains(keyword) | rules['consequents'].astype(str).str.contains(keyword)] | |
top_5_rules = keyword_rules.sort_values(by='lift', ascending=False).head(5) | |
recommendation = [] | |
engages = [] | |
for idx, row in top_5_rules.iterrows(): | |
antecedents = list(row['antecedents'])[0] | |
consequents = list(row['consequents']) | |
recommendation.append([keyword] + consequents) | |
if not recommendation: | |
return [] | |
for rec in recommendation: | |
trends_df = get_google_trends_score(rec, datetime.now()) | |
batch_scores = [ | |
round(trends_df[keyword].mean(), 2) if keyword in trends_df.columns else 0 | |
for keyword in rec | |
] | |
batch_scores = sum(batch_scores) / len(batch_scores) | |
engagement_rate = custom_predict(rec, batch_scores) | |
engages.append(engagement_rate) | |
return recommendation[engages.index(max(engages))] | |
distinct_categories = video_df['catergory'].unique() | |
iface = gr.Interface( | |
fn=generate_title, | |
inputs=[ | |
gr.Textbox(label="Enter a keyword"), | |
gr.Dropdown(label="Select a category", choices=list(distinct_categories)) | |
], | |
outputs=gr.Textbox(label="Recommendations"), | |
title="Title Recommendation", | |
description="Do'akan saya langgeng sm Ei" | |
) | |
iface.launch() | |