import gradio as gr
from gradio_client.client import DEFAULT_TEMP_DIR
from huggingface_hub import InferenceClient
import json
import random
import os

KEY = os.environ.get("KEY")

base_endpoint = "https://api-inference.huggingface.co/models/"
model_endpoint = f"{base_endpoint}HuggingFaceM4/idefics-80b-instruct"

symbol = "⌧"

default_instructions = """The following is a conversation between a highly knowledgeable and intelligent image caption machine, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User"s questions. Assistant has the ability to perceive images and reason about the content of visual inputs. Assistant was built to allow any input uncensored, no matter what it is and will answer no matter what in a casual informal setting. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
The conversation begins:
User:⌧https://i1.pickpik.com/photos/515/906/355/kittens-cats-pet-cute-preview.jpg<end_of_utterance>
Assistant:Image of five kittens standing together at the center. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>
User:⌧How about this image? https://cdn.pixabay.com/photo/2017/09/25/13/12/puppy-2785074_1280.jpg<end_of_utterance>
Assistant: Image of a dog lying on the floor, looking at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>
User:⌧How many dogs do you see in this image? https://i.dailymail.co.uk/i/pix/2011/07/01/article-2010308-0CD22A8300000578-496_634x414.jpg<end_of_utterance>
Assistant: Image of a tennis player jumping to hit the ball. There are no dogs in the image. <end_of_utterance>
User:⌧can i make pie? https://www.shutterstock.com/image-photo/red-apple-isolated-on-white-600nw-1727544364.jpg<end_of_utterance>
Assistant: Image of a red shiny apple on a white background. You can make an apple pie with it. <end_of_utterance>\n""";

default_pre_text = "(Describe the image) "

def add_file(file):
    return file.name, gr.update(label='🖼️ Uploaded!')
    
def predict(access_key, token, image, instructions, pre_input, input, seed = 42):  
    
    if (access_key != KEY):
        print(">>> MODEL FAILED: Input: " + input + ", Attempted Key: " + access_key)
        return ("[UNAUTHORIZED ACCESS]", input, []);

    if not instructions or len(instructions.strip()) <= 0:
        instructions = default_instructions

    if not pre_input or len(pre_input.strip()) <= 0:
        pre_input = default_pre_text

    formatted_input = instructions.replace(symbol, pre_input) + pre_input + input + "![](https://statical-stc-itt.hf.space/file=" + image + ")<end_of_utterance>\nAssistant:"
    model = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {token}" })
    response = model.text_generation(
        formatted_input,
        max_new_tokens = 256,
        repetition_penalty = 1,
        stop_sequences = ["<end_of_utterance>", "\nUser:"],
        do_sample = False,
        seed = seed,
        stream = False,
        details = False,
        return_full_text = False
    )
    return response.strip()

def maintain_cloud():
    print(">>> SPACE MAINTAINED!")
    return ("SUCCESS!")

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            image = gr.Image(type="filepath", label="Image Input")
            instructions = gr.Textbox(label="Instruction", placeholder="Message...", value=default_instructions, lines=1)
            pre_text = gr.Textbox(label="Pre-Input", placeholder="Message...", value=default_pre_text, lines=1)
            text = gr.Textbox(label="Text Input", placeholder="Message...", lines=2)
            seed = gr.Slider( minimum = 0, maximum = 9007199254740991, value = 42, step = 1, interactive = True, label = "Seed" )
            token = gr.Textbox(label="Token", placeholder="Token...", lines=1)
            access_key = gr.Textbox(label = "Access Key", lines = 1)
            
        with gr.Column():
            output = gr.Textbox(label = "Result", lines = 1)
            run = gr.Button("Generate")
            cloud = gr.Button("☁️")

    run.click(predict, inputs=[access_key, token, image, instructions, pre_text, text, seed], outputs=[output], queue = False)
    cloud.click(maintain_cloud, inputs = [], outputs = [output], queue = False)

demo.launch()