Spaces:

ChandraP12330
/

image-caption

Runtime error

App Files Files Community

ChandraP12330 commited on Apr 27, 2024

Commit

abbb301

verified ·

1 Parent(s): dff681a

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -0

app.py CHANGED Viewed

@@ -38,6 +38,40 @@ model = CLIPModel.from_pretrained(model_id)
 from transformers import pipeline
 image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
 # Title
 st.title("Image Caption Surveillance")
@@ -65,9 +99,15 @@ if image_url:
         caption = image_to_text(image_url, max_new_tokens=200)
         initial_caption= caption[0]['generated_text']
         ##Output
         st.write("context: ", context)
         st.write("initial_caption: ", initial_caption)
     except Exception as e:
         st.error(f"Error: {e}")
 else:

 from transformers import pipeline
 image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+##LLM
+GOOGLE_API_KEY = st.text_input("Please enter your GOOGLE GEMINI API KEY", type="password")
+os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.prompts import PromptTemplate
+from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory
+llm = ChatGoogleGenerativeAI(model="gemini-1.0-pro-latest", google_api_key=GOOGLE_API_KEY, temperature=0.2, top_p=1, top_k=1,
+                             safety_settings={
+        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+    },
+      )
+template="""You are an advanced image captioning AI assistant for surveillance related images.
+Your task is to refine and improve an initial image caption using relevant contextual information provided.
+You will receive two inputs:
+Input 1: {initial_caption} - This is the initial caption for the image, most likely grammatically incorrect
+and incomplete sentence, generated by a separate not so good image captioning model.
+Input 2: {context} - This is the contextual information that provides more details about the background
+Your goal is to take the initial caption and the additional context, and produce a new, refined caption that
+incorporates the contextual details.
+Please do not speculate things which are not provided. The final caption should be grammatically correct.
+Please output only the final caption."""
+prompt_template = PromptTemplate(
+    template=template,
+    input_variables=["initial_caption", "context"],
+)
 # Title
 st.title("Image Caption Surveillance")
         caption = image_to_text(image_url, max_new_tokens=200)
         initial_caption= caption[0]['generated_text']
+        ##LLM
+        prompt=prompt_template.format(initial_caption=initial_caption, context=context)
+        response = llm.invoke(prompt)
+        final_caption = response.content
         ##Output
         st.write("context: ", context)
         st.write("initial_caption: ", initial_caption)
+        st.write("final_caption: ", final_caption)
     except Exception as e:
         st.error(f"Error: {e}")
 else: