# -*- coding: utf-8 -*- """mp_art_classification.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1mCMy50B9xHW2WdGNlxTq-wObAe-eMsQ5 """ import os import shutil import math import glob import json import pickle import requests import time import re import string from datetime import datetime import pandas as pd import numpy as np from PIL import Image import matplotlib.pyplot as plt import tensorflow as tf if 'workspace/semantic_search' in os.getcwd(): ROOT_FOLDER = os.path.join("./hf", "mp_art_classification") else: ROOT_FOLDER = './' PRE_TRAINED_MODELS_FOLDER = os.path.join(ROOT_FOLDER, "pre_trained_models") TRAINED_WEIGHTS_FOLDER = os.path.join(ROOT_FOLDER, "trained_weights") def clean_directories(): shutil.rmtree(PRE_TRAINED_MODELS_FOLDER, ignore_errors=True) # clean_directories() def create_directories(): if not os.path.exists(PRE_TRAINED_MODELS_FOLDER): os.mkdir(PRE_TRAINED_MODELS_FOLDER) create_directories() from transformers import CLIPTokenizer, CLIPImageProcessor, TFCLIPTextModel, TFCLIPVisionModel clip_model_id = "openai/clip-vit-large-patch14" vision_model = TFCLIPVisionModel.from_pretrained( clip_model_id, cache_dir=PRE_TRAINED_MODELS_FOLDER) vision_processor = CLIPImageProcessor.from_pretrained(clip_model_id) genre_classes_path = os.path.join(ROOT_FOLDER,'genre_class.txt') # TSV headers [id, class] genre_classes_df = pd.read_csv(genre_classes_path, sep = ' ', header=None) # print(genre_train_df.iloc[:,1]) genre_classes = [] for index, row in genre_classes_df.iterrows(): genre_classes.append(row[1]) # print(genre_classes) classes_count = len(genre_classes) base_learning_rate = 0.0001 steps_per_execution = 200 def create_classification_model(): # Preprocess images inputs = tf.keras.Input(shape=(3, 224, 224)) rescaling_layer = tf.keras.layers.Rescaling(1.0/255, offset=0.0) rescaled_input = rescaling_layer(inputs) # processed_inputs = vision_processor(images=[inputs], return_tensors="tf") # print(inputs) vision_model.trainable=False base_model_output = vision_model(rescaled_input) current_layer = base_model_output.pooler_output hidden_layers_nodes = [1024] for node_count in hidden_layers_nodes: hidden_layer = tf.keras.layers.Dense(node_count, activation='relu') dropout_layer = tf.keras.layers.Dropout(.2, input_shape=(2,)) current_layer = hidden_layer(dropout_layer(current_layer)) prediction_layer = tf.keras.layers.Dense( classes_count, activation='softmax') outputs = prediction_layer(current_layer) model = tf.keras.Model(inputs, outputs) model.compile( # Used leagcy optimizer due to tf 2.11 release issues with MACOS # optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate), optimizer=tf.keras.optimizers.legacy.Adam( learning_rate=base_learning_rate), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'] # steps_per_execution=steps_per_execution ) return model model = create_classification_model() model.summary() latest_weights = tf.train.latest_checkpoint(TRAINED_WEIGHTS_FOLDER) model.load_weights(latest_weights) # image_path = tf.constant('./hf/mp_image_search/examples/e1.jpeg') # image = tf.io.read_file("/Users/skoneru/workspace/semantic_search/hf/mp_image_search/examples/e1.jpeg") # print(image) # decoded_image = tf.io.decode_image( # contents = image, # channels = 3, # expand_animations = False # ) # print(decoded_image.shape) # resized_image = tf.image.resize_with_pad( # image = decoded_image, # target_height = 224, # target_width = 224, # ) # print(resized_image.shape) # # constant_new = tf.constant( # # resized_image, dtype=tf.float32, shape=(224,224,3), name='input_image' # # ) # transposed_image = tf.transpose( # resized_image) # print(transposed_image.shape) # # constant = tf.constant( # # transposed_image, value_index=(3,224,224) # # ) # # constant_new = tf.constant( # # transposed_image, dtype=tf.float32, shape=(3,224,224), name='input_image' # # ) # ndarray = tf.make_ndarray( # tf.Variable(transposed_image, shape=(3,224,224)) # ) # # variable = tf.Variable(constant_new) # # print(constant_new) # # print(inputs) # image_path = './hf/mp_image_search/examples/e1.jpeg' # img = Image.open(image_path).convert('RGB') # desired_size =224 # old_size = img.size # old_size[0] is in (width, height) format # ratio = float(desired_size)/max(old_size) # new_size = tuple([int(x*ratio) for x in old_size]) # img.thumbnail((desired_size, desired_size), Image.ANTIALIAS) # new_im = Image.new("RGB", (desired_size, desired_size)) # new_im.paste(img, ((desired_size-new_size[0])//2, # (desired_size-new_size[1])//2)) # # new_im.show() # np_array = np.array(img) # print(np_array.shape) # transposed_np_array = np.transpose(np_array) # print(transposed_np_array.shape) # images_list = [] # images_list.append(transposed_np_array) # np_input = np.asarray(images_list) # print(np_input.shape) # result = model.predict(np_input) # print(result.flatten()) import gradio as gr def process_image(input_image): desired_size =224 old_size = input_image.size # old_size[0] is in (width, height) format ratio = float(desired_size)/max(old_size) new_size = tuple([int(x*ratio) for x in old_size]) input_image.thumbnail((desired_size, desired_size), Image.ANTIALIAS) new_im = Image.new("RGB", (desired_size, desired_size)) new_im.paste(input_image, ((desired_size-new_size[0])//2, (desired_size-new_size[1])//2)) # new_im.show() np_array = np.array(input_image) # print(np_array.shape) transposed_np_array = np.transpose(np_array) # print(transposed_np_array.shape) images_list = [] images_list.append(transposed_np_array) np_input = np.asarray(images_list) # print(np_input.shape) return model.predict(np_input).flatten() def predict(input_image): # print(input_image) # img = Image.create(input_image) pil_image_object = Image.fromarray(input_image) probs = process_image(pil_image_object) return {genre_classes[i]: float(probs[i]) for i in range(len(genre_classes))} image_path_prefx = os.path.join(ROOT_FOLDER,'examples') examples = [f"{image_path_prefx}/e{n}.jpeg" for n in range(4)] interpretation='shap' title = "MP Art Classifier" description = "Classifies Art into 10 Genres" theme = 'grass' gr.Interface( fn=predict, inputs=gr.inputs.Image(shape=((512,512))), outputs=gr.outputs.Label(num_top_classes=5), title = title, examples = examples, theme = theme, interpretation = interpretation, description = description ).launch(debug=True) clean_directories()