testFIle / app.py
kim368882's picture
Update app.py
8c7a8b8 verified
import base64
import json
import uuid
import pandas as pd
import openpyxl
from openpyxl.chart import BarChart, Reference, PieChart
from openpyxl.chart.label import DataLabelList
from openpyxl.utils.dataframe import dataframe_to_rows
from datetime import datetime
import matplotlib.pyplot as plt
import gradio as gr
import tempfile
from huggingface_hub import InferenceClient, hf_hub_url
import os
import matplotlib
matplotlib.use('Agg')
# Read excel data for review analysis
def read_excel_data(file):
df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1,
names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl')
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date
df['Year-Month'] = df['Review Date'].astype(str).str.slice(0, 7)
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4)
df['Month'] = df['Review Date'].astype(str).str.slice(5, 7)
df['Day'] = df['Review Date'].astype(str).str.slice(8, 10)
df['Option'] = df['Option'].astype(str) # Ensure Option column is treated as string
df['Option1'] = df['Option'].str.split(" / ").str[0] # 1์ฐจ ์˜ต์…˜๋งŒ ์ถ”์ถœ
df['Review Length'] = df['Review'].str.len() # ์ถ”๊ฐ€๋œ ๋ถ€๋ถ„: ๋ฆฌ๋ทฐ ๊ธธ์ด ๊ณ„์‚ฐ
return df
# Analyze review data
def analyze_data(df):
monthly_data = df.groupby('Year-Month').size().reset_index(name='Counts')
yearly_data = df.groupby('Year').size().reset_index(name='Counts')
return monthly_data, yearly_data
def analyze_monthly_data_for_year(df, selected_year):
monthly_data_for_year = df[df['Year'] == selected_year].groupby('Month').size().reset_index(name='Counts')
all_months = pd.DataFrame([f"{m:02d}" for m in range(1, 13)], columns=['Month'])
monthly_trend_for_year = pd.merge(all_months, monthly_data_for_year, on='Month', how='left')
monthly_trend_for_year['Counts'] = monthly_trend_for_year['Counts'].fillna(0).astype(int)
return monthly_trend_for_year
def analyze_daily_data(df, selected_year):
start_date = datetime.strptime(f"{selected_year}-01-01", "%Y-%m-%d").date()
end_date = datetime.strptime(f"{selected_year}-12-31", "%Y-%m-%d").date()
date_range = pd.date_range(start=start_date, end=end_date).date
daily_data = df[df['Year'] == selected_year].groupby('Review Date').size().reset_index(name='Counts')
daily_data['Review Date'] = pd.to_datetime(daily_data['Review Date']).dt.date
all_dates_df = pd.DataFrame(date_range, columns=['Review Date'])
all_dates_df['Review Date'] = pd.to_datetime(all_dates_df['Review Date']).dt.date
merged_data = pd.merge(all_dates_df, daily_data, on='Review Date', how='left')
merged_data['Counts'] = merged_data['Counts'].fillna(0).astype(int)
return merged_data
def analyze_option_data(df):
data_counts = df['Option1'].value_counts().reset_index()
data_counts.columns = ['Option', 'Counts']
total_counts = data_counts['Counts'].sum()
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2)
data_counts.sort_values(by='Counts', ascending=False, inplace=True)
return data_counts
def analyze_option_review_data(df):
df["Option1"] = df["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x)
df["Option2"] = df["Option"].apply(
lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "")
review_counts = df.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0)
review_counts["Total"] = review_counts.sum(axis=1)
option1_counts = df.groupby("Option1")["Option"].count()
option2_counts = df.groupby(["Option1", "Option2"])["Option"].count()
review_counts["์˜ต์…˜๋ช…(1์ฐจ)๊ฑด์ˆ˜"] = review_counts.index.get_level_values("Option1").map(option1_counts)
review_counts["์˜ต์…˜๋ช…(2์ฐจ)๊ฑด์ˆ˜"] = option2_counts
review_counts.sort_values(by=["์˜ต์…˜๋ช…(1์ฐจ)๊ฑด์ˆ˜", "์˜ต์…˜๋ช…(2์ฐจ)๊ฑด์ˆ˜"], ascending=[False, False], inplace=True)
return review_counts
def analyze_option_data_for_year(df, selected_year):
df_year = df[df['Year'] == selected_year]
data_counts = df_year['Option1'].value_counts().reset_index()
data_counts.columns = ['Option', 'Counts']
total_counts = data_counts['Counts'].sum()
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2)
data_counts.sort_values(by='Counts', ascending=False, inplace=True)
return data_counts
def analyze_option_review_data_for_year(df, selected_year):
df_year = df[df['Year'] == selected_year].copy()
df_year.loc[:, "Option1"] = df_year["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x)
df_year.loc[:, "Option2"] = df_year["Option"].apply(
lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "")
review_counts = df_year.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0)
review_counts["Total"] = review_counts.sum(axis=1)
option1_counts = df_year.groupby("Option1")["Option"].count()
option2_counts = df_year.groupby(["Option1", "Option2"])["Option"].count()
review_counts["์˜ต์…˜๋ช…(1์ฐจ)๊ฑด์ˆ˜"] = review_counts.index.get_level_values("Option1").map(option1_counts)
review_counts["์˜ต์…˜๋ช…(2์ฐจ)๊ฑด์ˆ˜"] = option2_counts
review_counts.sort_values(by=["์˜ต์…˜๋ช…(1์ฐจ)๊ฑด์ˆ˜", "์˜ต์…˜๋ช…(2์ฐจ)๊ฑด์ˆ˜"], ascending=[False, False], inplace=True)
return review_counts
def extract_longest_reviews(df):
longest_reviews = df.groupby('ReviewScore').apply(
lambda x: x.nlargest(100, 'Review Length', keep='all')).reset_index(drop=True)
return longest_reviews.drop(
columns=['Review Length', 'Year-Month', 'Year', 'Month', 'Day', 'Option1', 'Option2']) # ์‚ญ์ œ๋œ ์—ด๋“ค
def save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts,
review_counts, selected_option_counts, selected_review_counts, longest_reviews):
wb = openpyxl.Workbook()
# ์›๋ณธ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ ์‹œํŠธ ์ถ”๊ฐ€ ๋ฐ ์ด๋ฆ„ ๋ณ€๊ฒฝ
ws_original = wb.active
ws_original.title = "์›๋ณธ๋ฆฌ๋ทฐ๋ฐ์ดํ„ฐ"
for r in dataframe_to_rows(original_data, index=False, header=True):
ws_original.append(r)
ws_original.sheet_properties.tabColor = "000000" # ๊ฒ€์€์ƒ‰
# ๋ฆฌ๋ทฐ๋ถ„์„ ์ถ”์ด ์‹œํŠธ ์ถ”๊ฐ€
ws1 = wb.create_sheet(title="์ „์ฒด์›”๋ณ„์ถ”์ด(๋ฆฌ๋ทฐ๋ถ„์„)")
for r in dataframe_to_rows(monthly_counts, index=False, header=True):
ws1.append(r)
chart1 = BarChart()
chart1.type = "col"
chart1.style = 10
chart1.title = "Monthly Review Trends"
chart1.y_axis.title = 'Review Counts'
chart1.x_axis.title = 'Year-Month'
data1 = Reference(ws1, min_col=2, min_row=1, max_row=ws1.max_row, max_col=2)
cats1 = Reference(ws1, min_col=1, min_row=2, max_row=ws1.max_row)
chart1.add_data(data1, titles_from_data=True)
chart1.set_categories(cats1)
chart1.width = 30
chart1.height = 15
ws1.add_chart(chart1, "C2")
ws1.sheet_properties.tabColor = "FFA500" # ์ฃผํ™ฉ์ƒ‰
# ๋…„๋„๋ณ„ ๋ฆฌ๋ทฐ ๋ถ„์„ ์‹œํŠธ ์ถ”๊ฐ€
ws2 = wb.create_sheet(title="๋…„๋„๋ณ„์ถ”์ด(๋ฆฌ๋ทฐ๋ถ„์„)")
for r in dataframe_to_rows(yearly_counts, index=False, header=True):
ws2.append(r)
chart2 = BarChart()
chart2.type = "col"
chart2.style = 10
chart2.title = "Yearly Review Trends"
chart2.y_axis.title = 'Review Counts'
chart2.x_axis.title = 'Year'
data2 = Reference(ws2, min_col=2, min_row=1, max_row=ws2.max_row, max_col=2)
cats2 = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row)
chart2.add_data(data2, titles_from_data=True)
chart2.set_categories(cats2)
chart2.width = 30
chart2.height = 15
ws2.add_chart(chart2, "C2")
ws2.sheet_properties.tabColor = "FFA500" # ์ฃผํ™ฉ์ƒ‰
# ์›”๋ณ„ ๋ฆฌ๋ทฐ ๋ถ„์„ ์‹œํŠธ ์ถ”๊ฐ€
ws3 = wb.create_sheet(title="์„ ํƒํ•œ ๋…„๋„ ์›”๋ณ„์ถ”์ด(๋ฆฌ๋ทฐ๋ถ„์„)")
for r in dataframe_to_rows(monthly_trend, index=False, header=True):
ws3.append(r)
chart3 = BarChart()
chart3.type = "col"
chart3.style = 10
chart3.title = "Monthly Trends for Selected Year"
chart3.y_axis.title = 'Review Counts'
chart3.x_axis.title = 'Month'
data3 = Reference(ws3, min_col=2, min_row=1, max_row=ws3.max_row, max_col=2)
cats3 = Reference(ws3, min_col=1, min_row=2, max_row=ws3.max_row)
chart3.add_data(data3, titles_from_data=True)
chart3.set_categories(cats3)
chart3.width = 30
chart3.height = 15
ws3.add_chart(chart3, "C2")
ws3.sheet_properties.tabColor = "FFA500" # ์ฃผํ™ฉ์ƒ‰
# ์ผ๋ณ„ ๋ฆฌ๋ทฐ ๋ถ„์„ ์‹œํŠธ ์ถ”๊ฐ€
ws4 = wb.create_sheet(title="์„ ํƒํ•œ ๋…„๋„ ์ผ๋ณ„์ถ”์ด(๋ฆฌ๋ทฐ๋ถ„์„)")
for r in dataframe_to_rows(daily_counts, index=False, header=True):
ws4.append(r)
chart4 = BarChart()
chart4.type = "col"
chart4.style = 10
chart4.title = "Daily Trends for Selected Year"
chart4.y_axis.title = 'Review Counts'
chart4.x_axis.title = 'Date'
data4 = Reference(ws4, min_col=2, min_row=2, max_row=ws4.max_row + 1, max_col=2)
cats4 = Reference(ws4, min_col=1, min_row=2, max_row=ws4.max_row + 1)
chart4.add_data(data4, titles_from_data=True)
chart4.set_categories(cats4)
chart4.width = 50
chart4.height = 15
ws4.add_chart(chart4, "C2")
ws4.sheet_properties.tabColor = "FFA500" # ์ฃผํ™ฉ์ƒ‰
# ์˜ต์…˜๋ถ„์„ ๊ฒฐ๊ณผ ์‹œํŠธ ์ถ”๊ฐ€
ws5 = wb.create_sheet(title="์˜ต์…˜๋ถ„์„ ๊ฒฐ๊ณผ(์˜ต์…˜๋ถ„์„)")
for r in dataframe_to_rows(option_counts, index=False, header=True):
ws5.append(r)
bar_chart = BarChart()
data = Reference(ws5, min_col=2, min_row=2, max_row=ws5.max_row, max_col=2)
cats = Reference(ws5, min_col=1, min_row=2, max_row=ws5.max_row, max_col=1)
bar_chart.add_data(data, titles_from_data=False)
bar_chart.set_categories(cats)
bar_chart.title = "Option Analysis (Counts)"
bar_chart.width = 40
bar_chart.height = 20
ws5.add_chart(bar_chart, "G2")
ws5.sheet_properties.tabColor = "0000FF" # ํŒŒ๋ž‘์ƒ‰
# Create pie chart
top_10 = option_counts.head(10)
for idx, row in enumerate(top_10.itertuples(), 1):
ws5.cell(row=idx + 1, column=5, value=row.Option)
ws5.cell(row=idx + 1, column=6, value=row.Counts)
others_sum = option_counts['Counts'][10:].sum()
ws5.cell(row=12, column=5, value='Others')
ws5.cell(row=12, column=6, value=others_sum)
ws5.cell(row=1, column=5, value='Option')
ws5.cell(row=1, column=6, value='Counts')
pie_chart = PieChart()
data = Reference(ws5, min_col=6, min_row=2, max_row=12)
categories = Reference(ws5, min_col=5, min_row=2, max_row=12)
pie_chart.add_data(data, titles_from_data=False)
pie_chart.set_categories(categories)
pie_chart.title = "Top 10 Options (Share)"
pie_chart.dataLabels = DataLabelList()
pie_chart.dataLabels.showPercent = True
pie_chart.width = 30
pie_chart.height = 20
ws5.add_chart(pie_chart, "G40")
# ์˜ต์…˜๋ณ„ํ‰์ ๋ถ„์„ ์‹œํŠธ ์ถ”๊ฐ€
ws6 = wb.create_sheet(title="์˜ต์…˜๋ณ„ํ‰์ ๋ถ„์„(์˜ต์…˜๋ถ„์„)")
ws6.append(
["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts",
"Option2 Counts"])
for r in dataframe_to_rows(review_counts, index=True, header=False):
ws6.append(r)
ws6.sheet_properties.tabColor = "0000FF" # ํŒŒ๋ž‘์ƒ‰
# ์„ ํƒํ•œ ๋…„๋„ ์˜ต์…˜๋ถ„์„ ๊ฒฐ๊ณผ ์‹œํŠธ ์ถ”๊ฐ€
ws7 = wb.create_sheet(title="์„ ํƒํ•œ ๋…„๋„ ์˜ต์…˜๋ถ„์„ ๊ฒฐ๊ณผ(์˜ต์…˜๋ถ„์„)")
for r in dataframe_to_rows(selected_option_counts, index=False, header=True):
ws7.append(r)
bar_chart_selected = BarChart()
data_selected = Reference(ws7, min_col=2, min_row=2, max_row=ws7.max_row, max_col=2)
cats_selected = Reference(ws7, min_col=1, min_row=2, max_row=ws7.max_row, max_col=1)
bar_chart_selected.add_data(data_selected, titles_from_data=False)
bar_chart_selected.set_categories(cats_selected)
bar_chart_selected.title = "Option Analysis for Selected Year (Counts)"
bar_chart_selected.width = 40
bar_chart_selected.height = 20
ws7.add_chart(bar_chart_selected, "G2")
ws7.sheet_properties.tabColor = "0000FF" # ํŒŒ๋ž‘์ƒ‰
# Create pie chart for selected year
top_10_selected = selected_option_counts.head(10)
for idx, row in enumerate(top_10_selected.itertuples(), 1):
ws7.cell(row=idx + 1, column=5, value=row.Option)
ws7.cell(row=idx + 1, column=6, value=row.Counts)
others_sum_selected = selected_option_counts['Counts'][10:].sum()
ws7.cell(row=12, column=5, value='Others')
ws7.cell(row=12, column=6, value=others_sum_selected)
ws7.cell(row=1, column=5, value='Option')
ws7.cell(row=1, column=6, value='Counts')
pie_chart_selected = PieChart()
data_selected_pie = Reference(ws7, min_col=6, min_row=2, max_row=12)
categories_selected_pie = Reference(ws7, min_col=5, min_row=2, max_row=12)
pie_chart_selected.add_data(data_selected_pie, titles_from_data=False)
pie_chart_selected.set_categories(categories_selected_pie)
pie_chart_selected.title = "Top 10 Options for Selected Year (Share)"
pie_chart_selected.dataLabels = DataLabelList()
pie_chart_selected.dataLabels.showPercent = True
pie_chart_selected.width = 30
pie_chart_selected.height = 20
ws7.add_chart(pie_chart_selected, "G40")
# ์„ ํƒํ•œ ๋…„๋„ ์˜ต์…˜๋ณ„ํ‰์ ๋ถ„์„ ์‹œํŠธ ์ถ”๊ฐ€
ws8 = wb.create_sheet(title="์„ ํƒํ•œ ๋…„๋„ ์˜ต์…˜๋ณ„ํ‰์ ๋ถ„์„(์˜ต์…˜๋ถ„์„)")
ws8.append(
["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts",
"Option2 Counts"])
for r in dataframe_to_rows(selected_review_counts, index=True, header=False):
ws8.append(r)
ws8.sheet_properties.tabColor = "0000FF" # ํŒŒ๋ž‘์ƒ‰
# ๋ฆฌ๋ทฐ ๋‚ด์šฉ์ด ๊ธด ๋ฆฌ๋ทฐ ์‹œํŠธ ์ถ”๊ฐ€
ws9 = wb.create_sheet(title="๊ธด ๋ฆฌ๋ทฐ ๋‚ด์šฉ")
for r in dataframe_to_rows(longest_reviews, index=False, header=True):
ws9.append(r)
ws9.sheet_properties.tabColor = "00FF00" # ์ดˆ๋ก์ƒ‰
file_path = "๋ฆฌ๋ทฐ๋ถ„์„ ๋‹ค์šด๋กœ๋“œ.xlsx"
wb.save(file_path)
return file_path
def generate_plots(df, year):
# ์ตœ๊ทผ 3๋…„์˜ ๋ฐ์ดํ„ฐ๋งŒ ์‚ฌ์šฉ
start_year = datetime.now().year - 2
recent_data = df[df['Year'].astype(int) >= start_year]
monthly_counts, yearly_counts = analyze_data(df) # Use all data for yearly counts
recent_monthly_counts, _ = analyze_data(recent_data) # Use recent data for monthly counts
monthly_trend = analyze_monthly_data_for_year(recent_data, year)
daily_counts = analyze_daily_data(recent_data, year)
option_counts = analyze_option_data(recent_data)
plot_files = []
# ์›”๋ณ„ ๋ฆฌ๋ทฐ ์ถ”์ด ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
fig1, ax1 = plt.subplots()
ax1.plot(recent_monthly_counts['Year-Month'], recent_monthly_counts['Counts'], marker='o')
ax1.set_title('Monthly Review Trends (Recent 3 Years)', fontsize=16) # ์ œ๋ชฉ ํฐํŠธ ํฌ๊ธฐ ์„ค์ •
ax1.set_ylabel('Review Counts', fontsize=14) # y์ถ• ๋ ˆ์ด๋ธ” ํฐํŠธ ํฌ๊ธฐ ์„ค์ •
# x์ถ• ๋ ˆ์ด๋ธ”์„ 90๋„ ํšŒ์ „ํ•˜์—ฌ ํ‘œ์‹œํ•˜๊ณ  ํฐํŠธ ํฌ๊ธฐ ์ค„์ž„
ax1.tick_params(axis='x', rotation=90, labelsize=6)
tmp_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig1.savefig(tmp_file1.name)
plot_files.append(tmp_file1.name)
fig2, ax2 = plt.subplots()
ax2.bar(yearly_counts['Year'], yearly_counts['Counts'])
ax2.set_title('Yearly Review Trends')
ax2.set_xlabel('Year')
ax2.set_ylabel('Review Counts')
tmp_file2 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig2.savefig(tmp_file2.name)
plot_files.append(tmp_file2.name)
fig3, ax3 = plt.subplots()
ax3.bar(monthly_trend['Month'], monthly_trend['Counts'])
ax3.set_title('Monthly Trends for Selected Year')
ax3.set_xlabel('Month')
ax3.set_ylabel('Review Counts')
tmp_file3 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig3.savefig(tmp_file3.name)
plot_files.append(tmp_file3.name)
fig4, ax4 = plt.subplots()
ax4.bar(daily_counts['Review Date'], daily_counts['Counts'])
ax4.set_title('Daily Trends for Selected Year')
ax4.set_xlabel('Date')
ax4.set_ylabel('Review Counts')
tmp_file4 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig4.savefig(tmp_file4.name)
plot_files.append(tmp_file4.name)
return plot_files
def process_file(file, year):
df = read_excel_data(file)
monthly_counts, yearly_counts = analyze_data(df)
monthly_trend = analyze_monthly_data_for_year(df, year)
daily_counts = analyze_daily_data(df, year)
option_counts = analyze_option_data(df)
review_counts = analyze_option_review_data(df)
selected_option_counts = analyze_option_data_for_year(df, year)
selected_review_counts = analyze_option_review_data_for_year(df, year)
longest_reviews = extract_longest_reviews(df)
original_data = pd.read_excel(file, sheet_name=0, engine='openpyxl') # ์ฒซ ๋ฒˆ์งธ ์‹œํŠธ๋งŒ ๋กœ๋“œ
result_file = save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts,
option_counts, review_counts, selected_option_counts, selected_review_counts,
longest_reviews)
return result_file
# ํŒŒ์ผ์„ ์ €์žฅํ•˜๊ณ  ๋‹ค์šด๋กœ๋“œ URL์„ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
def generate_download_links(plots):
download_links = []
for i, plot in enumerate(plots):
if os.path.exists(plot):
with open(plot, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
data_url = f"image/png;base64,{encoded_string}"
download_links.append(data_url)
return download_links
def get_model_info(filenames):
download_links = []
for f in filenames:
if os.path.exists(f):
url = hf_hub_url(repo_id="", filename=f)
download_links.append(url)
print(download_links)
return download_links
def process_file_with_plots(file, year):
df = read_excel_data(file)
result_file = process_file(file, year)
plots = generate_plots(df, year)
print(result_file)
print(plots)
return [result_file] + plots
def process_file_with_plots2(file, year):
df = read_excel_data(file)
result_file = process_file(file, year)
plots = generate_plots(df, year)
download_links = get_model_info(plots)
return_values = [result_file] + download_links
return return_values
years = [str(year) for year in range(datetime.now().year, datetime.now().year - 10, -1)]
def predict(file, year):
return process_file_with_plots(file, year)
def predict_api(file, year):
return process_file_with_plots2(file, year)
# ๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
def get_positive_reviews(df, years, option_analysis):
df = df[df['Year'].isin(years)]
if option_analysis != "์ „์ฒด์˜ต์…˜๋ถ„์„":
top_n = int(option_analysis.split("(")[1].split("๊ฐœ")[0])
top_options = df['Option1'].value_counts().head(top_n).index.tolist()
df = df[df['Option1'].isin(top_options)]
positive_reviews = df[(df['ReviewScore'] == 5) & (df['Review Length'] <= 500)].sort_values(by='Review Length',
ascending=False)
if len(positive_reviews) < 20:
additional_reviews = df[(df['ReviewScore'] == 4) & (df['Review Length'] <= 500)].sort_values(by='Review Length',
ascending=False)
positive_reviews = pd.concat([positive_reviews, additional_reviews])
positive_reviews = positive_reviews.head(20)
positive_reviews.reset_index(drop=True, inplace=True)
positive_reviews.index += 1
positive_reviews['์ˆœ๋ฒˆ'] = positive_reviews.index
return "\n\n".join(positive_reviews.apply(
lambda x: f"{x['์ˆœ๋ฒˆ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
# ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
def get_negative_reviews(df, years, option_analysis):
df = df[df['Year'].isin(years)]
if option_analysis != "์ „์ฒด์˜ต์…˜๋ถ„์„":
top_n = int(option_analysis.split("(")[1].split("๊ฐœ")[0])
top_options = df['Option1'].value_counts().head(top_n).index.tolist()
df = df[df['Option1'].isin(top_options)]
negative_reviews = df[(df['ReviewScore'] == 1) & (df['Review Length'] <= 500)].sort_values(by='Review Length',
ascending=False)
if len(negative_reviews) < 30:
additional_reviews = df[(df['ReviewScore'] == 2) & (df['Review Length'] <= 500)].sort_values(by='Review Length',
ascending=False)
negative_reviews = pd.concat([negative_reviews, additional_reviews])
negative_reviews = negative_reviews.head(30)
negative_reviews.reset_index(drop=True, inplace=True)
negative_reviews.index += 1
negative_reviews['์ˆœ๋ฒˆ'] = negative_reviews.index
return "\n\n".join(negative_reviews.apply(
lambda x: f"{x['์ˆœ๋ฒˆ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
# ๋ฆฌ๋ทฐ ์—…๋ฐ์ดํŠธ ๋ฐ ๋ถ„์„ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ ํ•จ์ˆ˜
def update_reviews(file, years, option_analysis):
df = read_excel_data(file)
positive_reviews = get_positive_reviews(df, years, option_analysis)
negative_reviews = get_negative_reviews(df, years, option_analysis)
positive_prompt = f"{positive_reviews}\n\n{prompts['๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„']}"
negative_prompt = f"{negative_reviews}\n\n{prompts['๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„']}"
return positive_reviews, negative_reviews, positive_prompt, negative_prompt
# ๋ฆฌ๋ทฐ ๋ถ„์„ ํ•จ์ˆ˜
def analyze_all(positive_prompt, negative_prompt):
positive_analysis, _ = generate_section(
review_output=positive_prompt,
system_message=prompts["๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„"],
max_tokens=15000,
temperature=0.3,
top_p=0.95,
)
negative_analysis, _ = generate_section(
review_output=negative_prompt,
system_message=prompts["๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„"],
max_tokens=15000,
temperature=0.4,
top_p=0.95,
)
return positive_analysis, negative_analysis
# Create a new client for CohereForAI/c4ai-command-r-plus model
def create_client(model_name):
return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
client = create_client("CohereForAI/c4ai-command-r-plus")
# Function to generate analysis for each review type
def generate_section(review_output, system_message, max_tokens, temperature, top_p):
prompt = f"{review_output}\n\n{system_message}"
response = call_api(prompt, max_tokens, temperature, top_p)
return response, prompt
# Function to call the API
def call_api(content, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": ""}, {"role": "user", "content": content}]
response = client.chat_completion(messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
return response.choices[0].message['content']
prompts = {
"๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„": """[์ค‘์š” ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ์ถœ๋ ฅํ•˜๋ผ.
2. ๋„ˆ๋Š” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ๋น…๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ด๋‹ค.
3. ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ธ์ •์ ์ธ ์˜๊ฒฌ์˜ ๋ฐ์ดํ„ฐ๋งŒ ๋ถ„์„ํ•˜๋ผ.
4. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ๋งŒ ๋ถ„์„ํ•˜๋ผ.
5. ๋„ˆ์˜ ์ƒ๊ฐ์„ ํฌํ•จํ•˜์ง€ ๋ง ๊ฒƒ.
[๋ถ„์„ ์กฐ๊ฑด]
1. ์ด 20๊ฐœ์˜ ๋ฆฌ๋ทฐ๋ฐ์ดํ„ฐ๋ฅผ ์ œ๊ณตํ•œ๋‹ค.
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์˜ ๋‘˜์งธ์ค„ ๋ถ€ํ„ฐ์˜ ์‹ค์ œ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜์˜ํ•˜๋ผ.
3. ๋ฐ˜๋“œ์‹œ ๊ธ์ •์ ์ธ ์˜๊ฒฌ๋งŒ์„ ๋ถ„์„ํ•˜๋ผ. ๋ถ€์ •์ ์ธ ์˜๊ฒฌ์€ ์ œ์™ธํ•˜๋ผ.
4. ๊ธฐ๋Šฅ๊ณผ ์„ฑ๋Šฅ์˜ ๋ถ€๋ถ„, ๊ฐ์„ฑ์ ์ธ ๋ถ€๋ถ„, ์‹ค์ œ ์‚ฌ์šฉ ์ธก๋ฉด์˜ ๋ถ€๋ถ„, ๋ฐฐ์†ก์˜ ๋ถ€๋ถ„, ํƒ€๊ฒŸ๋ณ„ ๋ถ€๋ถ„์˜ ๊ด€์ ์œผ๋กœ ๋ถ„์„ํ•˜๋ผ.
5. 4๋ฒˆ์˜ ์กฐ๊ฑด์— ํฌํ•จ๋˜์ง€ ์•Š๋Š” ๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ„๋„๋กœ ์ถœ๋ ฅํ•˜๋ผ.
6. ๋งˆ์ผ€ํŒ…์ ์ธ ์š”์†Œ๋กœ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ณ ๊ฐ์˜ ์‹ค์ œ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜์˜ํ•˜๋ผ.
[์ถœ๋ ฅ ํ˜•ํƒœ ์กฐ๊ฑด]
1. ๊ฐ๊ฐ์˜ ์ œ๋ชฉ ์•ž์— '๐Ÿ“'์ด๋ชจ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋ผ,'#', '##'์€ ์ถœ๋ ฅํ•˜์ง€ ๋ง๊ฒƒ.
2. ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— ์ข…ํ•ฉ ์˜๊ฒฌ์„ ์ž‘์„ฑํ•˜๋ผ, "๐Ÿ†์ข…ํ•ฉ์˜๊ฒฌ"์˜ ์ œ๋ชฉํ˜•ํƒœ๋ฅผ ์‚ฌ์šฉํ•˜๋ผ.
[์ข…ํ•ฉ์˜๊ฒฌ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ์‹œ์ž‘]
('์ข…ํ•ฉ์˜๊ฒฌ'์ด ์•„๋‹Œ ๋‹ค๋ฅธ ๋ถ€๋ถ„์— ์ด ์ถœ๋ ฅ ์กฐ๊ฑด์„ ๋ฐ˜์˜ํ•˜์ง€ ๋ง ๊ฒƒ.
- ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๋ผ.
- ์ข…ํ•ฉ์˜๊ฒฌ์—๋Š” ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๊ณ  ์„œ์ˆ ์‹ ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•˜๋ผ.
- ๋งค์ถœ์„ ๊ทน๋Œ€ํ™” ํ•  ์ˆ˜ ์žˆ๋Š” ๊ณ ๊ฐ์˜ ์‹ค์ œ ๋ฆฌ๋ทฐ ํฌ์ธํŠธ๋ฅผ ์ œ์‹œํ•˜๋ผ.
[SWOT๋ถ„์„ ์กฐ๊ฑด]
1. '์ข…ํ•ฉ์˜๊ฒฌ' ๋‹ค์Œ ๋‚ด์šฉ์œผ๋กœ SWOT๋ถ„์„ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
2. SWOT๋ถ„์„ ์ค‘ '๊ฐ•์ '์˜๊ฒฌ๊ณผ '๊ธฐํšŒ'์˜ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
3. ๋ฐ˜๋“œ์‹œ '์ข…ํ•ฉ์˜๊ฒฌ'์˜ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ž‘์„ฑํ•˜๋ผ.
4. ์ œ๋ชฉ์€ '๐Ÿน ๊ฐ•์ ', '๐Ÿน ๊ธฐํšŒ'์œผ๋กœ ์ถœ๋ ฅํ•˜๋ผ.
[์ข…ํ•ฉ์˜๊ฒฌ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ๋]
3. ์‹ค์ œ ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ ์‚ฌ์šฉ๋œ ๋‹จ์–ด๋ฅผ ํฌํ•จํ•˜๋ผ.
4. ๋„ˆ์˜ ์ƒ๊ฐ์„ ์ž„์˜๋กœ ๋„ฃ์ง€ ๋ง ๊ฒƒ.
""",
"๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„": """[์ค‘์š” ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ์ถœ๋ ฅํ•˜๋ผ.
2. ๋„ˆ๋Š” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ๋น…๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ด๋‹ค.
3. ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋ถ€์ •์ ์ธ ์˜๊ฒฌ์˜ ๋ฐ์ดํ„ฐ๋งŒ ๋ถ„์„ํ•˜๋ผ.
4. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ๋งŒ ๋ถ„์„ํ•˜๋ผ.
5. ๋„ˆ์˜ ์ƒ๊ฐ์„ ํฌํ•จํ•˜์ง€ ๋ง ๊ฒƒ.
[๋ถ„์„ ์กฐ๊ฑด]
1. ์ด 30๊ฐœ์˜ ๋ฆฌ๋ทฐ๋ฐ์ดํ„ฐ๋ฅผ ์ œ๊ณตํ•œ๋‹ค.
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์˜ ๋‘˜์งธ์ค„ ๋ถ€ํ„ฐ์˜ ์‹ค์ œ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜์˜ํ•˜๋ผ.
3. ๋ถ€์ •์ ์ธ ์˜๊ฒฌ๋งŒ์„ ๋ถ„์„ํ•˜๋ผ.
4. ๊ธฐ๋Šฅ๊ณผ ์„ฑ๋Šฅ์˜ ๋ถ€๋ถ„, ๊ฐ์„ฑ์ ์ธ ๋ถ€๋ถ„, ์‹ค์ œ ์‚ฌ์šฉ ์ธก๋ฉด์˜ ๋ถ€๋ถ„, ๋ฐฐ์†ก์˜ ๋ถ€๋ถ„, ๊ณ ๊ฐ์˜ ๋ถ„๋…ธ ๋ถ€๋ถ„์˜ ๊ด€์ ์œผ๋กœ ๋ถ„์„ํ•˜๋ผ.
5. 4๋ฒˆ์˜ ์กฐ๊ฑด์— ํฌํ•จ๋˜์ง€ ์•Š๋Š” ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ„๋„๋กœ ์ถœ๋ ฅํ•˜๋ผ.
6. ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ '๊ฐœ์„ ํ•  ์ '์„ ์ถœ๋ ฅํ•˜๋ผ.
[์ถœ๋ ฅ ํ˜•ํƒœ ์กฐ๊ฑด]
1. ๊ฐ๊ฐ์˜ ์ œ๋ชฉ ์•ž์— '๐Ÿ“'์ด๋ชจ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋ผ,'#', '##'์€ ์ถœ๋ ฅํ•˜์ง€ ๋ง๊ฒƒ.
2. ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— '๊ฐœ์„ ํ•  ์ '์„ ์ถœ๋ ฅํ•˜๋ผ("๐Ÿ“ข๊ฐœ์„ ํ•  ์ "์˜ ์ œ๋ชฉํ˜•ํƒœ๋ฅผ ์‚ฌ์šฉํ•˜๋ผ.)
[๊ฐœ์„ ํ•  ์ ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ์‹œ์ž‘]
('๊ฐœ์„ ํ•  ์ '์ด ์•„๋‹Œ ๋‹ค๋ฅธ ๋ถ€๋ถ„์— ์ด ์ถœ๋ ฅ ์กฐ๊ฑด์„ ๋ฐ˜์˜ํ•˜์ง€ ๋ง ๊ฒƒ.
- ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๋ผ.
- ์ฃผ์š” ํ•ญ๋ชฉ๋ณ„๋กœ ๊ฐœ์„ ํ•  ์ ์„ ์ถœ๋ ฅํ•˜๋ผ.
- ์ „๋ฌธ์ ์ด๊ณ , ๋ถ„์„์ ์ด๋ฉฐ, ์ œ์•ˆํ•˜๋Š” ํ˜•ํƒœ์˜ ๊ณต์†ํ•œ ์–ดํˆฌ๋ฅผ ์‚ฌ์šฉํ•˜๋ผ.(๋‹จ๋‹ตํ˜• ํ‘œํ˜„ ๊ธˆ์ง€)
[SWOT๋ถ„์„ ์กฐ๊ฑด]
1. '์ข…ํ•ฉ์˜๊ฒฌ' ๋‹ค์Œ ๋‚ด์šฉ์œผ๋กœ SWOT๋ถ„์„ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
2. SWOT๋ถ„์„ ์ค‘ '์•ฝ์ '์˜๊ฒฌ๊ณผ '์œ„ํ˜‘'์˜ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
3. ๋ฐ˜๋“œ์‹œ '๊ฐœ์„ ํ•  ์ '์˜ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ž‘์„ฑํ•˜๋ผ.
4. ์ œ๋ชฉ์€ '๐Ÿ’‰ ์•ฝ์ ', '๐Ÿ’‰ ์œ„ํ˜‘'์œผ๋กœ ์ถœ๋ ฅํ•˜๋ผ.
[๊ฐœ์„ ํ•  ์ ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ๋]
3. ์‹ค์ œ ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ ์‚ฌ์šฉ๋œ ๋‹จ์–ด๋ฅผ ํฌํ•จํ•˜๋ผ.
4. ๋„ˆ์˜ ์ƒ๊ฐ์„ ์ž„์˜๋กœ ๋„ฃ์ง€ ๋ง ๊ฒƒ.
"""
}
def select_all_years():
current_year = datetime.now().year
return [str(year) for year in range(current_year, current_year - 5, -1)]
def deselect_all_years():
return []
with gr.Blocks() as ๋ฆฌ๋ทฐ์ถ”์ด_๋ถ„์„:
gr.Markdown("### ์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ")
file_input = gr.File(label="", file_types=["xlsx"])
year_selection = gr.Radio(years, label="๋ถ„์„๋…„๋„ ์„ ํƒ", value=str(datetime.now().year))
analyze_button = gr.Button("๋ถ„์„ ์‹คํ–‰")
outputs = [
gr.File(label="์„ธ๋ถ€๋ถ„์„ ์ž๋ฃŒ๋ฅผ ๋‹ค์šด๋ฐ›์œผ์„ธ์š”(ExcelํŒŒ์ผ)"),
gr.File(label="์ตœ๊ทผ3๋…„๊ฐ„ ์›”๋ณ„ ๋ฆฌ๋ทฐ์ถ”์ด"),
gr.File(label="์ตœ๊ทผ ๋…„๋„๋ณ„ ๋ฆฌ๋ทฐ์ถ”์ด"),
gr.File(label="์„ ํƒ๋…„๋„ ์›” ๋ฆฌ๋ทฐ์ถ”์ด"),
gr.File(label="์„ ํƒ๋…„๋„ ์ผ์ผ ๋ฆฌ๋ทฐ์ถ”์ด"),
]
analyze_button.click(predict, inputs=[file_input, year_selection], outputs=outputs)
with gr.Blocks() as ๋ฆฌ๋ทฐ๋ถ„์„:
year_selection_review = gr.CheckboxGroup(
choices=[str(year) for year in select_all_years()],
label="์—ฐ๋„ ์„ ํƒ",
value=[str(year) for year in select_all_years()]
)
option_selection = gr.Radio(
choices=["์ „์ฒด์˜ต์…˜๋ถ„์„", "์ฃผ์š”์˜ต์…˜๋ถ„์„(1๊ฐœ)", "์ฃผ์š”์˜ต์…˜๋ถ„์„(3๊ฐœ)", "์ฃผ์š”์˜ต์…˜๋ถ„์„(5๊ฐœ)"],
label="์˜ต์…˜๋ณ„ ๋ฆฌ๋ทฐ๋ถ„์„ ์„ ํƒ",
value="์ „์ฒด์˜ต์…˜๋ถ„์„"
)
analyze_button_review = gr.Button("๋ฆฌ๋ทฐ ๊ฐ€์ ธ์˜ค๊ธฐ")
analyze_all_button = gr.Button("๋ฆฌ๋ทฐ ๋ถ„์„ํ•˜๊ธฐ")
with gr.Column():
gr.Markdown("### ๋ฆฌ๋ทฐ ๊ฒฐ๊ณผ")
positive_reviews_output_review = gr.Textbox(label="๊ธ์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ(20๊ฐœ)", interactive=False, lines=12)
negative_reviews_output_review = gr.Textbox(label="๋ถ€์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ(30๊ฐœ)", interactive=False, lines=12)
gr.Markdown("### ์ถœ๋ ฅ")
positive_analysis_output_review = gr.Textbox(label="๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„", interactive=False, lines=12)
negative_analysis_output_review = gr.Textbox(label="๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ถ„์„", interactive=False, lines=12)
analyze_button_review.click(update_reviews, inputs=[file_input, year_selection_review, option_selection],
outputs=[positive_reviews_output_review, negative_reviews_output_review])
analyze_all_button.click(
fn=analyze_all,
inputs=[positive_reviews_output_review, negative_reviews_output_review],
outputs=[positive_analysis_output_review, negative_analysis_output_review]
)
with gr.Row():
with gr.Column():
positive_analysis_output_review
with gr.Column():
negative_analysis_output_review
with gr.Blocks() as tabs:
with gr.Tab("๋ฆฌ๋ทฐ์ถ”์ด ๋ถ„์„"):
๋ฆฌ๋ทฐ์ถ”์ด_๋ถ„์„.render()
with gr.Tab("๋ฆฌ๋ทฐ๋ถ„์„"):
๋ฆฌ๋ทฐ๋ถ„์„.render()
if __name__ == "__main__":
tabs.launch()