import gradio as gr from gradio_image_prompter import ImagePrompter import Predict import XGBoost_utils import numpy as np import cv2 as cv import torch from PIL import Image GENERAL_CATEGORY = {'Potatoes / Vegetables / Fruit': 0, 'Chemical products': 1, 'Photo / Film / Optical items': 2, 'Catering industry': 3, 'Industrial products other': 4, 'Media': 5, 'Real estate': 6, 'Government': 7, 'Personnel advertisements': 8, 'Cars / Commercial vehicles': 9, 'Cleaning products': 10, 'Retail': 11, 'Fragrances': 12, 'Footwear / Leather goods': 13, 'Software / Automation': 14, 'Telecommunication equipment': 15, 'Tourism': 16, 'Transport/Communication companies': 17, 'Transport services': 18, 'Insurances': 19, 'Meat / Fish / Poultry': 20, 'Detergents': 21, 'Foods General': 22, 'Other services': 23, 'Banks and Financial Services': 24, 'Office Products': 25, 'Household Items': 26, 'Non-alcoholic beverages': 27, 'Hair, Oral and Personal Care': 28, 'Fashion and Clothing': 29, 'Other products and Services': 30, 'Paper products': 31, 'Alcohol and Other Stimulants': 32, 'Medicines': 33, 'Recreation and Leisure': 34, 'Electronics': 35, 'Home Furnishings': 36, 'Products for Business Use': 37} CATEGORIES = list(GENERAL_CATEGORY.keys()) CATEGORIES.sort() LOCATIONS = ['Left', 'Right', 'Full'] GAZE_TYPE = ['Ad', 'Brand'] def calculate_areas(prompts, brand_num, pictorial_num, text_num): image_entire = prompts["image"] w, h = image_entire.size image_entire = np.array(image_entire.convert('RGB')) points_all = prompts["points"] brand_surf = 0 for i in range(brand_num): x1 = points_all[i][0]; y1 = points_all[i][1] x2 = points_all[i][3]; y2 = points_all[i][4] brand_surf += np.abs((x1-x2)*(y1-y2)) pictorial_surf = 0 for i in range(brand_num, brand_num+pictorial_num): x1 = points_all[i][0]; y1 = points_all[i][1] x2 = points_all[i][3]; y2 = points_all[i][4] pictorial_surf += np.abs((x1-x2)*(y1-y2)) text_surf = 0 for i in range(brand_num+pictorial_num, brand_num+pictorial_num+text_num): x1 = points_all[i][0]; y1 = points_all[i][1] x2 = points_all[i][3]; y2 = points_all[i][4] text_surf += np.abs((x1-x2)*(y1-y2)) ad_size = 0 x1 = points_all[-1][0]; y1 = points_all[-1][1] x2 = points_all[-1][3]; y2 = points_all[-1][4] ad_size += np.abs((x1-x2)*(y1-y2)) ad_image = image_entire[int(y1):int(y2), int(x1):int(x2), :] left_margin = x1; right_margin = w-x2 if left_margin <=100 and right_margin <= 100: upper_margin = y1; lower_margin = h-y2 if upper_margin >= lower_margin: context_image = image_entire[:int(y1), :, :] else: context_image = image_entire[int(y2):, :, :] else: if left_margin >= right_margin: context_image = image_entire[:, :int(x1), :] else: context_image = image_entire[:, int(x2):, :] whole_size = 0 whole_size += w*h return (brand_surf/whole_size*100, pictorial_surf/whole_size*100, text_surf/whole_size*100, ad_size/whole_size*100, ad_image, context_image) def attention(notes, download1, download2, whole_display_prompt, brand_num, pictorial_num, text_num, category, ad_location, gaze_type): text_detection_model_path = 'EAST-Text-Detection/frozen_east_text_detection.pb' LDA_model_pth = 'LDA_Model_trained/lda_model_best_tot.model' training_ad_text_dictionary_path = 'LDA_Model_trained/object_word_dictionary' training_lang_preposition_path = 'LDA_Model_trained/dutch_preposition' prod_group = np.zeros(38) prod_group[GENERAL_CATEGORY[category]] = 1 if ad_location == 'left': ad_loc = 0 elif ad_location == 'right': ad_loc = 1 else: ad_loc = None brand_percent, visual_percent, text_percent, adv_size_percent, ad_image, context_image = calculate_areas(whole_display_prompt, brand_num, pictorial_num, text_num) surfaces = [brand_percent, visual_percent, text_percent, adv_size_percent*10/100] # caption_ad = XGBoost_utils.Caption_Generation(Image.fromarray(np.uint8(ad_image))) # caption_context = XGBoost_utils.Caption_Generation(Image.fromarray(np.uint8(context_image))) # ad_topic = XGBoost_utils.Topic_emb(caption_ad) # ctpg_topic = XGBoost_utils.Topic_emb(caption_context) np.random.seed(42) ad_topic = np.random.randn(1,768) ctpg_topic = np.random.randn(1,768) ad = cv.resize(ad_image, (640, 832)) print('ad shape: ', ad.shape) context = cv.resize(context_image, (640, 832)) adv_imgs = torch.permute(torch.tensor(ad), (2,0,1)).unsqueeze(0) ctpg_imgs = torch.permute(torch.tensor(context), (2,0,1)).unsqueeze(0) ad_locations = torch.tensor([1,0]).unsqueeze(0) heatmap = Predict.HeatMap_CNN(adv_imgs, ctpg_imgs, ad_locations, Gaze_Type='AG') Gaze = Predict.Ad_Gaze_Prediction(input_ad_path=ad, input_ctpg_path=context, ad_location=ad_loc, text_detection_model_path=text_detection_model_path, LDA_model_pth=LDA_model_pth, training_ad_text_dictionary_path=training_ad_text_dictionary_path, training_lang_preposition_path=training_lang_preposition_path, training_language='dutch', ad_embeddings=ad_topic, ctpg_embeddings=ctpg_topic, surface_sizes=surfaces, Product_Group=prod_group, obj_detection_model_pth=None, num_topic=20, Gaze_Time_Type=gaze_type) return notes, notes, np.round(Gaze,2), notes, Image.fromarray(np.flip(heatmap, axis=2)) def greet(name, intensity): return "Hello " * intensity + name + "!" with gr.Blocks() as demo: gr.Interface( fn=attention, inputs=[gr.Markdown(""" Instruction: 1. Click to upload or drag the entire image that contains BOTH ad and its context; 2. Draw bounding boxes in the order of: (each element can have more than 1 boxes; remember the number of boxes for each element you draw)    (a) Brand element(s) (skip if N.A.)    (b) Pictorial element(s), e.g. Objects, Person etc (skip if N.A.)    (c) Text element(s) (skip if N.A.)    (d) The advertisement. Two example ads are avialable for download: """), gr.DownloadButton(label="Download Example Image 1 of Ad and Context", value='Demo/Ad_Example1.jpg'), gr.DownloadButton(label="Download Example Image 2 of Ad and Context", value='Demo/Ad_Example2.jpg'), ImagePrompter(label="Upload Entire (Ad+Context) Image, and Draw Bounding Boxes", sources=['upload'], type="pil"), gr.Number(label="Number of brand bounding boxes drawn"), gr.Number(label="Number of pictorial bounding boxes drawn"), gr.Number(label="Number of text bounding boxes drawn"), gr.Dropdown(CATEGORIES, label="Product Category"), gr.Dropdown(LOCATIONS, label='Ad Location'), gr.Dropdown(GAZE_TYPE, label='Gaze Type') ], outputs=[gr.Markdown("""Note: Outputs could take around 70-80 seconds under current CPU environment (due to ResNet model)."""), gr.Markdown("""### Predicted Gaze (sec) """), gr.Number(show_label=False), gr.Markdown("""### Heatmap by ResNet50 (Hotter/Redder regions show more contribution)"""), gr.Image(show_label=False)], title="Gazer 1.0: Ad Attention Prediction", description="""This app accompanies: "Contextual Advertising with Theory-Informed Machine Learning", manuscript submitted to the Journal of Marketing. App Version: 1.0, Date: 10/24/2024. Note: Gazer 1.0 does not yet include LLM generated ad topics. Future updates will include this in a GPU environment.""", theme=gr.themes.Soft() ) gr.Markdown( """

Copyright © 2024 Manuscript Authors. All Rights Reserved.

""" ) demo.launch(share=True)