Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""mp_art_classification.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1mCMy50B9xHW2WdGNlxTq-wObAe-eMsQ5 | |
""" | |
import os | |
import shutil | |
import math | |
import glob | |
import json | |
import pickle | |
import requests | |
import time | |
import re | |
import string | |
from datetime import datetime | |
import pandas as pd | |
import numpy as np | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
import tensorflow as tf | |
if 'workspace/semantic_search' in os.getcwd(): | |
ROOT_FOLDER = os.path.join("./hf", "mp_art_classification") | |
else: | |
ROOT_FOLDER = './' | |
PRE_TRAINED_MODELS_FOLDER = os.path.join(ROOT_FOLDER, "pre_trained_models") | |
TRAINED_WEIGHTS_FOLDER = os.path.join(ROOT_FOLDER, "trained_weights") | |
def clean_directories(): | |
shutil.rmtree(PRE_TRAINED_MODELS_FOLDER, ignore_errors=True) | |
# clean_directories() | |
def create_directories(): | |
if not os.path.exists(PRE_TRAINED_MODELS_FOLDER): | |
os.mkdir(PRE_TRAINED_MODELS_FOLDER) | |
create_directories() | |
from transformers import CLIPTokenizer, CLIPImageProcessor, TFCLIPTextModel, TFCLIPVisionModel | |
clip_model_id = "openai/clip-vit-large-patch14" | |
vision_model = TFCLIPVisionModel.from_pretrained( | |
clip_model_id, | |
cache_dir=PRE_TRAINED_MODELS_FOLDER) | |
vision_processor = CLIPImageProcessor.from_pretrained(clip_model_id) | |
genre_classes_path = os.path.join(ROOT_FOLDER,'genre_class.txt') | |
# TSV headers [id, class] | |
genre_classes_df = pd.read_csv(genre_classes_path, sep = ' ', header=None) | |
# print(genre_train_df.iloc[:,1]) | |
genre_classes = [] | |
for index, row in genre_classes_df.iterrows(): | |
genre_classes.append(row[1]) | |
# print(genre_classes) | |
classes_count = len(genre_classes) | |
base_learning_rate = 0.0001 | |
steps_per_execution = 200 | |
def create_classification_model(): | |
# Preprocess images | |
inputs = tf.keras.Input(shape=(3, 224, 224)) | |
rescaling_layer = tf.keras.layers.Rescaling(1.0/255, offset=0.0) | |
rescaled_input = rescaling_layer(inputs) | |
# processed_inputs = vision_processor(images=[inputs], return_tensors="tf") | |
# print(inputs) | |
vision_model.trainable=False | |
base_model_output = vision_model(rescaled_input) | |
current_layer = base_model_output.pooler_output | |
hidden_layers_nodes = [1024] | |
for node_count in hidden_layers_nodes: | |
hidden_layer = tf.keras.layers.Dense(node_count, activation='relu') | |
dropout_layer = tf.keras.layers.Dropout(.2, input_shape=(2,)) | |
current_layer = hidden_layer(dropout_layer(current_layer)) | |
prediction_layer = tf.keras.layers.Dense( | |
classes_count, activation='softmax') | |
outputs = prediction_layer(current_layer) | |
model = tf.keras.Model(inputs, outputs) | |
model.compile( | |
# Used leagcy optimizer due to tf 2.11 release issues with MACOS | |
# optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate), | |
optimizer=tf.keras.optimizers.legacy.Adam( | |
learning_rate=base_learning_rate), | |
loss=tf.keras.losses.SparseCategoricalCrossentropy(), | |
metrics=['accuracy'] | |
# steps_per_execution=steps_per_execution | |
) | |
return model | |
model = create_classification_model() | |
model.summary() | |
latest_weights = tf.train.latest_checkpoint(TRAINED_WEIGHTS_FOLDER) | |
model.load_weights(latest_weights) | |
# image_path = tf.constant('./hf/mp_image_search/examples/e1.jpeg') | |
# image = tf.io.read_file("/Users/skoneru/workspace/semantic_search/hf/mp_image_search/examples/e1.jpeg") | |
# print(image) | |
# decoded_image = tf.io.decode_image( | |
# contents = image, | |
# channels = 3, | |
# expand_animations = False | |
# ) | |
# print(decoded_image.shape) | |
# resized_image = tf.image.resize_with_pad( | |
# image = decoded_image, | |
# target_height = 224, | |
# target_width = 224, | |
# ) | |
# print(resized_image.shape) | |
# # constant_new = tf.constant( | |
# # resized_image, dtype=tf.float32, shape=(224,224,3), name='input_image' | |
# # ) | |
# transposed_image = tf.transpose( | |
# resized_image) | |
# print(transposed_image.shape) | |
# # constant = tf.constant( | |
# # transposed_image, value_index=(3,224,224) | |
# # ) | |
# # constant_new = tf.constant( | |
# # transposed_image, dtype=tf.float32, shape=(3,224,224), name='input_image' | |
# # ) | |
# ndarray = tf.make_ndarray( | |
# tf.Variable(transposed_image, shape=(3,224,224)) | |
# ) | |
# # variable = tf.Variable(constant_new) | |
# # print(constant_new) | |
# # print(inputs) | |
# image_path = './hf/mp_image_search/examples/e1.jpeg' | |
# img = Image.open(image_path).convert('RGB') | |
# desired_size =224 | |
# old_size = img.size # old_size[0] is in (width, height) format | |
# ratio = float(desired_size)/max(old_size) | |
# new_size = tuple([int(x*ratio) for x in old_size]) | |
# img.thumbnail((desired_size, desired_size), Image.ANTIALIAS) | |
# new_im = Image.new("RGB", (desired_size, desired_size)) | |
# new_im.paste(img, ((desired_size-new_size[0])//2, | |
# (desired_size-new_size[1])//2)) | |
# # new_im.show() | |
# np_array = np.array(img) | |
# print(np_array.shape) | |
# transposed_np_array = np.transpose(np_array) | |
# print(transposed_np_array.shape) | |
# images_list = [] | |
# images_list.append(transposed_np_array) | |
# np_input = np.asarray(images_list) | |
# print(np_input.shape) | |
# result = model.predict(np_input) | |
# print(result.flatten()) | |
import gradio as gr | |
def process_image(input_image): | |
desired_size =224 | |
old_size = input_image.size # old_size[0] is in (width, height) format | |
ratio = float(desired_size)/max(old_size) | |
new_size = tuple([int(x*ratio) for x in old_size]) | |
input_image.thumbnail((desired_size, desired_size), Image.ANTIALIAS) | |
new_im = Image.new("RGB", (desired_size, desired_size)) | |
new_im.paste(input_image, ((desired_size-new_size[0])//2, | |
(desired_size-new_size[1])//2)) | |
# new_im.show() | |
np_array = np.array(input_image) | |
# print(np_array.shape) | |
transposed_np_array = np.transpose(np_array) | |
# print(transposed_np_array.shape) | |
images_list = [] | |
images_list.append(transposed_np_array) | |
np_input = np.asarray(images_list) | |
# print(np_input.shape) | |
return model.predict(np_input).flatten() | |
def predict(input_image): | |
# print(input_image) | |
# img = Image.create(input_image) | |
pil_image_object = Image.fromarray(input_image) | |
probs = process_image(pil_image_object) | |
return {genre_classes[i]: float(probs[i]) for i in range(len(genre_classes))} | |
image_path_prefx = os.path.join(ROOT_FOLDER,'examples') | |
examples = [f"{image_path_prefx}/e{n}.jpeg" for n in range(4)] | |
interpretation='shap' | |
title = "MP Art Classifier" | |
description = "<b>Classifies Art into 10 Genres</b>" | |
theme = 'grass' | |
gr.Interface( | |
fn=predict, | |
inputs=gr.inputs.Image(shape=((512,512))), | |
outputs=gr.outputs.Label(num_top_classes=5), | |
title = title, | |
examples = examples, | |
theme = theme, | |
interpretation = interpretation, | |
description = description | |
).launch(debug=True) | |
clean_directories() |