Test-Caption-Captain

Sleeping

App Files Files Community

Severian commited on Sep 26, 2024

Commit

6923442

verified ·

1 Parent(s): 5d69512

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -8

app.py CHANGED Viewed

@@ -184,9 +184,17 @@ def generate_caption(text_model, tokenizer, image_features, prompt_str: str, max
     return tokenizer.batch_decode(generate_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False)[0].strip()
 @spaces.GPU()
 @torch.no_grad()
-def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, lens_type: str = "", film_stock: str = "", composition_style: str = "", lighting_aspect: str = "", special_technique: str = "", color_effect: str = "") -> str:
     """
     Generate a caption or style prompt based on the input image and parameters.
     """
@@ -210,13 +218,14 @@ def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str,
     if caption_type == "style_prompt":
         prompt_str += f" Lens type: {lens_type} ({lens_types_info[lens_type]}). "
-        prompt_str += f"Film stock: {film_stock} ({film_stocks_info[film_stock]}). "
-        prompt_str += f"Composition style: {composition_style} ({composition_styles_info[composition_style]}). "
-        prompt_str += f"Lighting aspect: {lighting_aspect} ({lighting_aspects_info[lighting_aspect]}). "
-        prompt_str += f"Special technique: {special_technique} ({special_techniques_info[special_technique]}). "
-        prompt_str += f"Color effect: {color_effect} ({color_effects_info[color_effect]})."
-    print(f"Prompt: {prompt_str}")
     pixel_values = preprocess_image(input_image)
@@ -226,6 +235,17 @@ def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str,
         embedded_images = image_adapter(image_features)
         embedded_images = embedded_images.to('cuda')
     caption = generate_caption(text_model, tokenizer, embedded_images, prompt_str)
     return caption
@@ -492,7 +512,7 @@ with gr.Blocks(theme="Hev832/Applio", css=css) as demo:
     caption_type.change(update_style_options, inputs=[caption_type], outputs=[lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect])
-    run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect], outputs=[output_caption])
 if __name__ == "__main__":

     return tokenizer.batch_decode(generate_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False)[0].strip()
+# Add a dropdown menu for model selection
+model_selection = gr.Dropdown(
+    choices=["llama", "Qwen/Qwen2.5-7B-Instruct"],
+    label="Model Selection",
+    value="llama",
+)
+# Update the stream_chat function to accept the selected model
 @spaces.GPU()
 @torch.no_grad()
+def stream_chat(input_image: Image.Image, caption_type: str, caption_tone: str, caption_length: str | int, model_selection: str, lens_type: str = "", film_stock: str = "", composition_style: str = "", lighting_aspect: str = "", special_technique: str = "", color_effect: str = "") -> str:
     """
     Generate a caption or style prompt based on the input image and parameters.
     """
     if caption_type == "style_prompt":
         prompt_str += f" Lens type: {lens_type} ({lens_types_info[lens_type]}). "
+        prompt_str += f"Film stock: {film_stocks_info[film_stock]}). "
+        prompt_str += f"Composition style: {composition_styles_info[composition_style]}). "
+        prompt_str += f"Lighting aspect: {lighting_aspects_info[lighting_aspect]}). "
+        prompt_str += f"Special technique: {special_techniques_info[special_technique]}). "
+        prompt_str += f"Color effect: {color_effects_info[color_effect]})."
+    # Debugging: Print the constructed prompt string
+    print(f"Constructed Prompt: {prompt_str}")
     pixel_values = preprocess_image(input_image)
         embedded_images = image_adapter(image_features)
         embedded_images = embedded_images.to('cuda')
+    # Load the selected model
+    if model_selection == "llama":
+        text_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto", torch_dtype=torch.bfloat16)
+    else:
+        text_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-7B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+    text_model.eval()
+    # Debugging: Print the prompt string before passing to generate_caption
+    print(f"Prompt passed to generate_caption: {prompt_str}")
     caption = generate_caption(text_model, tokenizer, embedded_images, prompt_str)
     return caption
     caption_type.change(update_style_options, inputs=[caption_type], outputs=[lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect])
+    run_button.click(fn=stream_chat, inputs=[input_image, caption_type, caption_tone, caption_length, model_selection, lens_type, film_stock, composition_style, lighting_aspect, special_technique, color_effect], outputs=[output_caption])
 if __name__ == "__main__":