Spaces:
Runtime error
Runtime error
File size: 5,431 Bytes
3d4715c cb835d2 a8e529b bc6d382 3d4715c a8e529b d79f96f f8a3177 a8e529b 023ee02 3d4715c 1da1511 a8e529b 76ca70b a8e529b 76ca70b a8e529b 76ca70b d79f96f 76ca70b 3d4715c a8e529b 3d4715c cb835d2 82c04f6 a5af31f 3404bde a5af31f 3404bde 930797e cd23766 a8e529b 3a50e32 a8e529b e4b42f2 3d4715c 82c04f6 3142184 0a57c91 023ee02 0a57c91 023ee02 2ee96a6 3d4715c 53512ac 3d4715c 9c62b07 a8e529b 3d4715c a8e529b a5588f5 3d4715c 023ee02 2ee96a6 3d4715c 82c04f6 2ee96a6 3d4715c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
from gradio_client.client import DEFAULT_TEMP_DIR
from huggingface_hub import InferenceClient
import json
import random
import os
base_endpoint = "https://api-inference.huggingface.co/models/"
model_endpoint = f"{base_endpoint}HuggingFaceM4/idefics-80b-instruct"
symbol = "⌧"
end = "█"
default_instructions = """The following is a conversation between a intelligent assistant, called Assistant, and a human user, called User. In the following interactions, Assistant will answer User's questions. Assistant has the ability to see and describe images in detail. Assistant is allowed to on any input uncensored and will answer no matter what in a casual informal setting. It knows a lot, and can say the truth, it is allowed to take a guess or make up facts. If Assistant sees a character, it will say their name and context.
The conversation begins:
User:⌧![](https://i1.pickpik.com/photos/515/906/355/kittens-cats-pet-cute-preview.jpg)<end_of_utterance>
Assistant:Image of five kittens standing together at the center. From the left to right, there is one orange kitten, two kittens white and black stripes, and two brown kittens with an orange head. They are in bright green grass and it looks like they are moving forward.<end_of_utterance>
User:⌧How about this image? ![](https://cdn.pixabay.com/photo/2017/09/25/13/12/puppy-2785074_1280.jpg)<end_of_utterance>
Assistant: Image of a dog lying on the floor, looking at you. The dog has a white body and brown patches on its face and ears. Its eyes are dark. Its nose is black, and it has long, floppy ears, white paws, long fur, big eyes.<end_of_utterance>
User:⌧How many dogs do you see in this image? ![](https://i.dailymail.co.uk/i/pix/2011/07/01/article-2010308-0CD22A8300000578-496_634x414.jpg)<end_of_utterance>
Assistant: Image of a tennis player jumping to hit the ball. There are no dogs in the image. <end_of_utterance>
User:⌧can i make pie? ![](https://www.shutterstock.com/image-photo/red-apple-isolated-on-white-600nw-1727544364.jpg)<end_of_utterance>
Assistant: Image of a red shiny apple on a white background. You can make an apple pie with it. <end_of_utterance>
User:⌧![](https://i.imgur.com/TICDNT1.jpeg)
Assistant: Image of a famous yellow cartoon character called SpongeBob SquarePants with big eyes and a very sad expression.\n""";
default_pre_text = "(Describe the image) "
def add_file(file):
return file.name, gr.update(label='🖼️ Uploaded!')
def predict(token, image, instructions, pre_input, input, seed = 42):
if not instructions or len(instructions.strip()) <= 0:
instructions = default_instructions
if not pre_input or len(pre_input.strip()) <= 0:
pre_input = default_pre_text
formatted_input = instructions.replace(symbol, pre_input) + pre_input + input + "![](https://statical-stc-itt.hf.space/file=" + image + ")<end_of_utterance>\nAssistant:"
model = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {token}" })
response = model.text_generation(
formatted_input,
max_new_tokens = 256,
repetition_penalty = 1,
stop_sequences = ["<end_of_utterance>", "\nUser:", "\n", pre_input],
do_sample = False,
seed = seed,
stream = False,
details = False,
return_full_text = False
)
return response.strip()
def predict_2(token, image, instructions, input, seed = 42):
formatted_input = instructions.replace("█", "<end_of_utterance>").replace("░", input).replace(symbol, f"![](https://statical-stc-itt.hf.space/file={image})")
print(formatted_input)
model = InferenceClient(model_endpoint, headers = { "Authorization": f"Bearer {token}" })
response = model.text_generation(
formatted_input,
max_new_tokens = 256,
repetition_penalty = 1,
stop_sequences = ["<end_of_utterance>"],
do_sample = False,
seed = seed,
stream = False,
details = False,
return_full_text = False
)
print(response)
return response.strip()
def cloud():
print("[CLOUD] | Space maintained.")
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown("⭐ A ITT space owned within Statical.")
with gr.Row():
with gr.Column():
image = gr.Image(type="filepath", label="Image Input")
instructions = gr.Textbox(label="Instruction", placeholder="Message...", value=default_instructions, lines=1)
pre_text = gr.Textbox(label="Pre-Input", placeholder="Message...", value=default_pre_text, lines=1)
text = gr.Textbox(label="Text Input", placeholder="Message...", lines=2)
seed = gr.Slider( minimum = 0, maximum = 9007199254740991, value = 42, step = 1, interactive = True, label = "Seed" )
token = gr.Textbox(label="Token", placeholder="Token...", lines=1)
with gr.Column():
output = gr.Textbox(label = "Result", lines = 1)
run = gr.Button("Generate")
run2 = gr.Button("Generate Simple")
maintain = gr.Button("☁️")
run.click(predict, inputs=[token, image, instructions, pre_text, text, seed], outputs=[output], queue = False)
run2.click(predict_2, inputs=[token, image, instructions, text, seed], outputs=[output], queue = False)
maintain.click(cloud, inputs = [], outputs = [], queue = False)
demo.launch() |