gokaygokay's picture
Create app.py
4e70ef0 verified
raw
history blame
3.11 kB
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces
import re
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cuda").eval()
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
def modify_caption(caption: str) -> str:
"""
Removes specific prefixes from captions.
Args:
caption (str): A string containing a caption.
Returns:
str: The caption with the prefix removed if it was present.
"""
# Define the prefixes to remove
prefix_substrings = [
('captured from ', ''),
('captured at ', '')
]
# Create a regex pattern to match any of the prefixes
pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
replacers = {opening: replacer for opening, replacer in prefix_substrings}
# Function to replace matched prefix with its corresponding replacement
def replace_fn(match):
return replacers[match.group(0)]
# Apply the regex to the caption
return re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
@spaces.GPU
def run_example(image):
image = Image.fromarray(image)
prompt = "<DESCRIPTION>" + "Describe this image in great detail."
# Ensure the image is in RGB mode
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return modify_caption(parsed_answer["<DESCRIPTION>"])
css = """
#output {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown(TITLE)
with gr.Tab(label="Florence-2 SD3 Prompts"):
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture")
submit_btn = gr.Button(value="Submit")
with gr.Column():
output_text = gr.Textbox(label="Output Text")
gr.Examples(
[["image1.jpg"], ["image2.jpg"], ["image3.png"], ["image4.jpg"], ["image5.jpg"], ["image6.PNG"]],
inputs = [input_img],
outputs = [output],
fn=run_example,
label='Try captioning on examples'
)
submit_btn.click(run_example, [input_img], [output_text])
demo.launch(debug=True)