import pickle import pandas as pd import shap from shap.plots._force_matplotlib import draw_additive_plot import gradio as gr import numpy as np import matplotlib.pyplot as plt import xgboost as xgb from scipy import stats from gradio import Interface from gradio.components import Markdown, Row, Column, Slider, Dropdown, CheckboxGroup, Button, Textbox, Dataframe from category_encoders import TargetEncoder # Load DataFrames bb_df = pd.read_csv('beer_brewery_imputed.csv') bb_df_percentile = pd.read_csv('bb_df_testing.csv') aslin_example_df_full = pd.read_csv('App_Example_Aslin_Update.csv') aslin_example_df = aslin_example_df_full.drop(['Number of Ratings Beer'], axis=1) aslin_example_df['ABV'] = aslin_example_df['ABV']*100 # Load pickle files with open('unique_brewery_file.pickle', 'rb') as file: unique_breweries_list = pickle.load(file) loaded_model_regressor = pickle.load(open("XGB_Untappd_regressor_FlavorBreakout.pkl", 'rb')) loaded_model = pickle.load(open("XGB_Untappd_4_classifier_FlavorBreakout.pkl", 'rb')) loaded_enc_regressor = pickle.load(open("target_encoder_regressor_flavorbreakout.pkl", 'rb')) loaded_enc_classification = pickle.load(open("target_encoder_classification_flavorbreakout.pkl", 'rb')) # Define choices region_choices = ['Far West','Great Lakes','Mideast','Non-Con','Northeast','OTHER','Plains','Rocky Mountain','Southeast','Southwest'] style_choices = ['Altbier', 'Barleywine - American', 'Belgian Blonde', 'Blonde Ale', 'Bock - Doppelbock', 'Brown Ale - American', 'Cream Ale', 'Dark Ale', 'IPA - American', 'Lager - American', 'Pilsner - German', 'Stout - American', 'Wheat Beer - American Pale Wheat'] brewery_style_choices = ['Brew Pub', 'Cidery', 'Contract Brewery','Macro Brewery','Micro Brewery', 'Nano Brewery', 'OTHER', 'Regional Brewery'] state_choices = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MISSING', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'] # Define flavors and hops flavors_list = ['Apple', 'Apricot', 'Berry', 'Bitter', 'Caramel', 'Chocolate', 'Citrus', 'Clove', 'Coffee', 'Dry', 'Earthy', 'Fig', 'Floral', 'Fruity', 'Funky', 'Grapefruit', 'Hazelnut', 'Herbal', 'Malt', 'Nutmeg', 'Nutty', 'Peach', 'Pear', 'Peat', 'Pepper', 'Pine', 'Plum', 'Resin', 'Salty', 'Smoky', 'Sour', 'Spicy', 'Strawberry', 'Sweet', 'Tart', 'Toast', 'Toffee', 'Tropical', 'Vanilla'] hops_list = ['Amarillo', 'Cascade', 'Centennial', 'Chinook', 'Citra', 'Columbus', 'Crystal', 'Fuggle', 'Galaxy', 'Golding', 'Hallertau', 'Magnum', 'Mosaic', 'Noble', 'Nugget', 'Saaz', 'Simcoe', 'Tettnang', 'Warrior', 'Willamette'] # Setup SHAP explainer = shap.Explainer(loaded_model_regressor) # Function to extract selected items def extract_selected_items(row, items_list, prefix): return [item for item in items_list if row[prefix + item] == 1] # Function to filter beers def filter_beers(style, state): target_abv = 0.03 filtered_df = bb_df[(bb_df['Style'] == style) & (bb_df['State'] == state)].copy() filtered_df['ABV_diff'] = abs(filtered_df['ABV'] - target_abv) filtered_df.loc[filtered_df['ABV_diff'] > target_abv, 'ABV_diff'] = target_abv filtered_df = filtered_df[filtered_df['ABV_diff'] <= target_abv] sorted_df = filtered_df.sort_values(by='Number of Ratings Beer', ascending=False) limited_df = sorted_df.head(5)[['Brewery', 'Beer Name', 'Average Rating Beer', 'Number of Ratings Beer', 'Style', 'ABV', 'IBU', 'State']] limited_df = limited_df.rename(columns={'Average Rating Beer': 'Avg Rating', 'Number of Ratings Beer': '# Ratings'}) limited_df['ABV'] = (limited_df['ABV'] * 100).round(2).astype(str) + '%' limited_df['Avg Rating'] = limited_df['Avg Rating'].round(2) limited_df['IBU'] = limited_df['IBU'].astype(int) limited_df['# Ratings'] = limited_df['# Ratings'].apply(lambda x: '{:,}'.format(x)) return limited_df # Main function def main_func(BeerName, ABV, IBU, Style, BreweryStyle, Region, State, Flavor_Group, Hop_Group): flavors_selected = [flavor for flavor in flavors_list if flavor in Flavor_Group] hops_selected = [hop for hop in hops_list if hop in Hop_Group] new_row = pd.DataFrame(columns=['ABV', 'IBU', 'Style', 'Brewery Style', 'Region', 'State'] + flavors_list + hops_list) new_row.loc[0] = [float(ABV), float(IBU), Style, BreweryStyle, Region, State] + [1 if flavor in Flavor_Group else 0 for flavor in flavors_list] + [1 if hop in Hop_Group else 0 for hop in hops_list] new_row[['ABV', 'IBU']] = new_row[['ABV', 'IBU']].astype(float) new_row['ABV'] = new_row['ABV']/100 new_row_class = new_row.copy() new_row_regress = new_row.copy() new_row_encoded_class = loaded_enc_classification.transform(new_row_class) new_row_encoded_regressor = loaded_enc_regressor.transform(new_row_regress) prob = loaded_model.predict_proba(new_row_encoded_class) score_predict = loaded_model_regressor.predict(new_row_encoded_regressor)[0] score_predict = round(score_predict, 2) score_predict_str = str(score_predict) shap_values = explainer(new_row_encoded_regressor) plot = shap.plots.bar(shap_values[0], max_display=7, order=shap.Explanation.abs, show_data='auto', show=False) plt.tight_layout() local_plot = plt.gcf() plt.close() similar_beers = filter_beers(Style, State) nr_state_p = new_row['State'][0] nr_style_p = new_row['Style'][0] overall_df = bb_df_percentile state_df = bb_df_percentile[bb_df_percentile['State'] == nr_state_p] style_overall_df = bb_df_percentile[bb_df_percentile['Style'] == nr_style_p] style_state_df = bb_df_percentile[(bb_df_percentile['Style'] == nr_style_p) & (bb_df_percentile['State'] == nr_state_p)] percent_100 = 1/1 percentile_overall = stats.percentileofscore(overall_df['Average Rating Beer'], score_predict).round(1)/100 percentile_state = stats.percentileofscore(state_df['Average Rating Beer'], score_predict).round(1)/100 percentile_style_overall = stats.percentileofscore(style_overall_df['Average Rating Beer'], score_predict).round(1)/100 percentile_style_state = stats.percentileofscore(style_state_df['Average Rating Beer'], score_predict).round(1)/100 percentile_dict0 = { #"Percentile Scale": [percent_100], "Overall in USA": [percentile_overall], f" Overall in {nr_state_p}": [percentile_state], f"{nr_style_p}s in USA": [percentile_style_overall], f"{nr_style_p}s in {nr_state_p}": [percentile_style_state] } title_text = f"