Spaces:
Running
on
Zero
Running
on
Zero
import base64 | |
from io import BytesIO | |
import gradio as gr | |
import spaces | |
from llama_cpp import Llama | |
from llama_cpp.llama_chat_format import NanoLlavaChatHandler | |
chat_handler = NanoLlavaChatHandler.from_pretrained( | |
repo_id="abetlen/nanollava-gguf", | |
filename="*mmproj*", | |
) | |
llm = Llama.from_pretrained( | |
repo_id="abetlen/nanollava-gguf", | |
filename="*text-model*", | |
chat_handler=chat_handler, | |
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding | |
n_gpu_layers=-1, | |
flash_attn=True, | |
) | |
def answer_question(img, prompt): | |
img_bytes = BytesIO() | |
img.save(img_bytes, format='JPEG') | |
# Encode the bytes object to a base64-encoded string | |
data_url = 'data:image/jpeg;base64,' + base64.b64encode(img_bytes.getvalue()).decode() | |
response = llm.create_chat_completion( | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt}, | |
{"type": "image_url", "image_url": data_url}, | |
], | |
} | |
], | |
stream=True, | |
) | |
for chunk in response: | |
if "content" in chunk["choices"][0]["delta"]: | |
yield chunk["choices"][0]["delta"]["content"] | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# NanoLLaVA | |
""" | |
) | |
with gr.Row(): | |
prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4) | |
submit = gr.Button("Submit") | |
with gr.Row(): | |
img = gr.Image(type="pil", label="Upload an Image") | |
output = gr.TextArea(label="Response") | |
submit.click(answer_question, [img, prompt], output) | |
prompt.submit(answer_question, [img, prompt], output) | |
demo.queue().launch() | |