Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -38,6 +38,40 @@ model = CLIPModel.from_pretrained(model_id)
|
|
38 |
from transformers import pipeline
|
39 |
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# Title
|
43 |
st.title("Image Caption Surveillance")
|
@@ -65,9 +99,15 @@ if image_url:
|
|
65 |
caption = image_to_text(image_url, max_new_tokens=200)
|
66 |
initial_caption= caption[0]['generated_text']
|
67 |
|
|
|
|
|
|
|
|
|
|
|
68 |
##Output
|
69 |
st.write("context: ", context)
|
70 |
st.write("initial_caption: ", initial_caption)
|
|
|
71 |
except Exception as e:
|
72 |
st.error(f"Error: {e}")
|
73 |
else:
|
|
|
38 |
from transformers import pipeline
|
39 |
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
|
40 |
|
41 |
+
##LLM
|
42 |
+
GOOGLE_API_KEY = st.text_input("Please enter your GOOGLE GEMINI API KEY", type="password")
|
43 |
+
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
|
44 |
+
|
45 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
46 |
+
from langchain.prompts import PromptTemplate
|
47 |
+
from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory
|
48 |
+
llm = ChatGoogleGenerativeAI(model="gemini-1.0-pro-latest", google_api_key=GOOGLE_API_KEY, temperature=0.2, top_p=1, top_k=1,
|
49 |
+
safety_settings={
|
50 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
51 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
52 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
53 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
54 |
+
|
55 |
+
},
|
56 |
+
)
|
57 |
+
template="""You are an advanced image captioning AI assistant for surveillance related images.
|
58 |
+
Your task is to refine and improve an initial image caption using relevant contextual information provided.
|
59 |
+
You will receive two inputs:
|
60 |
+
Input 1: {initial_caption} - This is the initial caption for the image, most likely grammatically incorrect
|
61 |
+
and incomplete sentence, generated by a separate not so good image captioning model.
|
62 |
+
Input 2: {context} - This is the contextual information that provides more details about the background
|
63 |
+
Your goal is to take the initial caption and the additional context, and produce a new, refined caption that
|
64 |
+
incorporates the contextual details.
|
65 |
+
Please do not speculate things which are not provided. The final caption should be grammatically correct.
|
66 |
+
Please output only the final caption."""
|
67 |
+
|
68 |
+
prompt_template = PromptTemplate(
|
69 |
+
template=template,
|
70 |
+
input_variables=["initial_caption", "context"],
|
71 |
+
)
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
|
76 |
# Title
|
77 |
st.title("Image Caption Surveillance")
|
|
|
99 |
caption = image_to_text(image_url, max_new_tokens=200)
|
100 |
initial_caption= caption[0]['generated_text']
|
101 |
|
102 |
+
##LLM
|
103 |
+
prompt=prompt_template.format(initial_caption=initial_caption, context=context)
|
104 |
+
response = llm.invoke(prompt)
|
105 |
+
final_caption = response.content
|
106 |
+
|
107 |
##Output
|
108 |
st.write("context: ", context)
|
109 |
st.write("initial_caption: ", initial_caption)
|
110 |
+
st.write("final_caption: ", final_caption)
|
111 |
except Exception as e:
|
112 |
st.error(f"Error: {e}")
|
113 |
else:
|