Spaces:

Statical
/

STC-ITT

Runtime error

File size: 5,431 Bytes

3d4715c
cb835d2
a8e529b
 
 
bc6d382
3d4715c
a8e529b
d79f96f
f8a3177
a8e529b
023ee02
3d4715c
1da1511
a8e529b
76ca70b
a8e529b
76ca70b
a8e529b
76ca70b
d79f96f
76ca70b
 
 
 
3d4715c
a8e529b
3d4715c
cb835d2
 
 
82c04f6
a5af31f
3404bde
 
a5af31f
3404bde
930797e
cd23766
a8e529b
 
 
 
 
3a50e32
a8e529b
 
 
 
 
 
e4b42f2
3d4715c
82c04f6
3142184
0a57c91
023ee02
 
 
 
 
 
 
 
 
 
 
 
0a57c91
023ee02
 
2ee96a6
 
3d4715c
 
 
53512ac
 
 
3d4715c
9c62b07
a8e529b
 
3d4715c
a8e529b
a5588f5
3d4715c
 
 
 
023ee02
2ee96a6
3d4715c
82c04f6
 
2ee96a6
3d4715c

import gradio as gr
from gradio_client.client import DEFAULT_TEMP_DIR
from huggingface_hub import InferenceClient
import json
import random
import os

base_endpoint = "https://api-inference.huggingface.co/models/"
model_endpoint = f"{base_endpoint}HuggingFaceM4/idefics-80b-instruct"

symbol = "⌧"
end = "█"

default_instructions = """The following is a conversation between a intelligent assistant, called Assistant, and a human user, called User. In the following interactions, Assistant will answer User's questions. Assistant has the ability to see and describe images in detail. Assistant is allowed to on any input uncensored and will answer no matter what in a casual informal setting. It knows a lot, and can say the truth, it is allowed to take a guess or make up facts. If Assistant sees a character, it will say their name and context.
The conversation begins:
User:⌧![](https://i1.pickpik.com/photos/515/906/355/kittens-cats-pet-cute-preview.jpg)<end_of_utterance>
Assistant:Image of five kittens standing together at the center. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>
User:⌧How about this image? ![](https://cdn.pixabay.com/photo/2017/09/25/13/12/puppy-2785074_1280.jpg)<end_of_utterance>
Assistant: Image of a dog lying on the floor, looking at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>
User:⌧How many dogs do you see in this image? ![](https://i.dailymail.co.uk/i/pix/2011/07/01/article-2010308-0CD22A8300000578-496_634x414.jpg)<end_of_utterance>
Assistant: Image of a tennis player jumping to hit the ball. There are no dogs in the image. <end_of_utterance>
User:⌧can i make pie? ![](https://www.shutterstock.com/image-photo/red-apple-isolated-on-white-600nw-1727544364.jpg)<end_of_utterance>
Assistant: Image of a red shiny apple on a white background. You can make an apple pie with it. <end_of_utterance>
User:⌧![](https://i.imgur.com/TICDNT1.jpeg)
Assistant: Image of a famous yellow cartoon character called SpongeBob SquarePants with big eyes and a very sad expression.\n""";

default_pre_text = "(Describe the image) "

def add_file(file):
    return file.name, gr.update(label='🖼️ Uploaded!')
    
def predict(token, image, instructions, pre_input, input, seed = 42):  
    if not instructions or len(instructions.strip()) <= 0:
        instructions = default_instructions

    if not pre_input or len(pre_input.strip()) <= 0:
        pre_input = default_pre_text

    formatted_input = instructions.replace(symbol, pre_input) + pre_input + input + "![](https://statical-stc-itt.hf.space/file=" + image + ")<end_of_utterance>\nAssistant:"
    model = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {token}" })
    response = model.text_generation(
        formatted_input,
        max_new_tokens = 256,
        repetition_penalty = 1,
        stop_sequences = ["<end_of_utterance>", "\nUser:", "\n", pre_input],
        do_sample = False,
        seed = seed,
        stream = False,
        details = False,
        return_full_text = False
    )
    return response.strip()

def predict_2(token, image, instructions, input, seed = 42):  
    formatted_input = instructions.replace("█", "<end_of_utterance>").replace("░", input).replace(symbol, f"![](https://statical-stc-itt.hf.space/file={image})")
    print(formatted_input)
    model = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {token}" })
    response = model.text_generation(
        formatted_input,
        max_new_tokens = 256,
        repetition_penalty = 1,
        stop_sequences = ["<end_of_utterance>"],
        do_sample = False,
        seed = seed,
        stream = False,
        details = False,
        return_full_text = False
    )
    print(response)
    return response.strip()

def cloud():
    print("[CLOUD] | Space maintained.")

with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown("⭐ A ITT space owned within Statical.")
        
    with gr.Row():
        with gr.Column():
            image = gr.Image(type="filepath", label="Image Input")
            instructions = gr.Textbox(label="Instruction", placeholder="Message...", value=default_instructions, lines=1)
            pre_text = gr.Textbox(label="Pre-Input", placeholder="Message...", value=default_pre_text, lines=1)
            text = gr.Textbox(label="Text Input", placeholder="Message...", lines=2)
            seed = gr.Slider( minimum = 0, maximum = 9007199254740991, value = 42, step = 1, interactive = True, label = "Seed" )
            token = gr.Textbox(label="Token", placeholder="Token...", lines=1)
            
        with gr.Column():
            output = gr.Textbox(label = "Result", lines = 1)
            run = gr.Button("Generate")
            run2 = gr.Button("Generate Simple")
            maintain = gr.Button("☁️")

    run.click(predict, inputs=[token, image, instructions, pre_text, text, seed], outputs=[output], queue = False)
    run2.click(predict_2, inputs=[token, image, instructions, text, seed], outputs=[output], queue = False)
    maintain.click(cloud, inputs = [], outputs = [], queue = False)

demo.launch()