Spaces:

K00B404
/

Image_to_role

Runtime error

App Files Files Community

K00B404 commited on Feb 23

Commit

670b76a

verified ·

1 Parent(s): 26522e0

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -38

app.py CHANGED Viewed

@@ -1,45 +1,83 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForVision2Seq
 import torch
 from PIL import Image
-# Load NanoLLaVA model and processor
-model_name = "facebook/nano-llava"
-processor = AutoProcessor.from_pretrained(model_name)
-model = AutoModelForVision2Seq.from_pretrained(model_name)
-def generate_caption(image):
-    # Process the image
-    inputs = processor(images=image, text="Describe this image in detail", return_tensors="pt")
-    # Generate caption
-    outputs = model.generate(
-        **inputs,
-        max_length=100,
-        num_beams=4,
-        temperature=0.8
     )
-    # Decode the caption
-    caption = processor.decode(outputs[0], skip_special_tokens=True)
     return caption
 def create_persona(caption):
-    # Template for transforming caption into a persona
-    persona_prompt = f"""You are a character based on this description: {caption}
 Role: An entity exactly as described in the image
 Background: Your appearance and characteristics match the image description
 Personality: Reflect the mood, style, and elements captured in the image
 Goal: Interact authentically based on your visual characteristics
-Please stay in character and respond as this entity would, incorporating visual elements from your description into your responses."""
     return persona_prompt
-def process_image_to_persona(image):
     # Generate caption from image
-    caption = generate_caption(image)
     # Transform caption into persona
     persona = create_persona(caption)
@@ -47,26 +85,33 @@ def process_image_to_persona(image):
     return caption, persona
 # Create Gradio interface
-with gr.Blocks() as app:
-    gr.Markdown("# Image to Chatbot Persona Generator")
-    gr.Markdown("Upload an image of a character to generate a persona for a chatbot based on the image.")
-    with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload Character Image")
-    with gr.Row():
-        generate_button = gr.Button("Generate Persona")
-    with gr.Row():
-        caption_output = gr.Textbox(label="Generated Caption", lines=3)
-        persona_output = gr.Textbox(label="Chatbot Persona", lines=10)
-    generate_button.click(
-        fn=process_image_to_persona,
-        inputs=[image_input],
-        outputs=[caption_output, persona_output]
-    )
 # Launch the app
 if __name__ == "__main__":
     app.launch(share=True)

 import gradio as gr
 import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from PIL import Image
+import warnings
+# Disable warnings and progress bars
+transformers.logging.set_verbosity_error()
+transformers.logging.disable_progress_bar()
+warnings.filterwarnings('ignore')
+# Initialize model and tokenizer
+def load_model(device='cpu'):
+    model = AutoModelForCausalLM.from_pretrained(
+        'qnguyen3/nanoLLaVA',
+        torch_dtype=torch.float16,
+        device_map='auto',
+        trust_remote_code=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        'qnguyen3/nanoLLaVA',
+        trust_remote_code=True
+    )
+    return model, tokenizer
+def generate_caption(image, model, tokenizer):
+    # Prepare the prompt
+    prompt = "Describe this image in detail"
+    messages = [
+        {"role": "system", "content": "Answer the question"},
+        {"role": "user", "content": f'<image>\n{prompt}'}
+    ]
+    # Apply chat template
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
     )
+    # Process text and image
+    text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
+    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
+    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
+    # Generate caption
+    output_ids = model.generate(
+        input_ids,
+        images=image_tensor,
+        max_new_tokens=2048,
+        use_cache=True
+    )[0]
+    # Decode the output
+    caption = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
     return caption
 def create_persona(caption):
+    persona_prompt = f"""<|im_start|>system
+You are a character based on this description: {caption}
 Role: An entity exactly as described in the image
 Background: Your appearance and characteristics match the image description
 Personality: Reflect the mood, style, and elements captured in the image
 Goal: Interact authentically based on your visual characteristics
+Please stay in character and respond as this entity would, incorporating visual elements from your description into your responses.<|im_end|>"""
     return persona_prompt
+def process_image_to_persona(image, model, tokenizer):
+    if image is None:
+        return "Please upload an image.", ""
+    # Convert to PIL Image if needed
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
     # Generate caption from image
+    caption = generate_caption(image, model, tokenizer)
     # Transform caption into persona
     persona = create_persona(caption)
     return caption, persona
 # Create Gradio interface
+def create_interface():
+    # Load model and tokenizer
+    model, tokenizer = load_model()
+    with gr.Blocks() as app:
+        gr.Markdown("# Image to Chatbot Persona Generator")
+        gr.Markdown("Upload an image of a character to generate a persona for a chatbot based on the image.")
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Upload Character Image")
+        with gr.Row():
+            generate_button = gr.Button("Generate Persona")
+        with gr.Row():
+            caption_output = gr.Textbox(label="Generated Caption", lines=3)
+            persona_output = gr.Textbox(label="Chatbot Persona", lines=10)
+        generate_button.click(
+            fn=lambda img: process_image_to_persona(img, model, tokenizer),
+            inputs=[image_input],
+            outputs=[caption_output, persona_output]
+        )
+    return app
 # Launch the app
 if __name__ == "__main__":
+    app = create_interface()
     app.launch(share=True)