ChandraP12330 commited on
Commit
abbb301
·
verified ·
1 Parent(s): dff681a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py CHANGED
@@ -38,6 +38,40 @@ model = CLIPModel.from_pretrained(model_id)
38
  from transformers import pipeline
39
  image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Title
43
  st.title("Image Caption Surveillance")
@@ -65,9 +99,15 @@ if image_url:
65
  caption = image_to_text(image_url, max_new_tokens=200)
66
  initial_caption= caption[0]['generated_text']
67
 
 
 
 
 
 
68
  ##Output
69
  st.write("context: ", context)
70
  st.write("initial_caption: ", initial_caption)
 
71
  except Exception as e:
72
  st.error(f"Error: {e}")
73
  else:
 
38
  from transformers import pipeline
39
  image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
40
 
41
+ ##LLM
42
+ GOOGLE_API_KEY = st.text_input("Please enter your GOOGLE GEMINI API KEY", type="password")
43
+ os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
44
+
45
+ from langchain_google_genai import ChatGoogleGenerativeAI
46
+ from langchain.prompts import PromptTemplate
47
+ from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory
48
+ llm = ChatGoogleGenerativeAI(model="gemini-1.0-pro-latest", google_api_key=GOOGLE_API_KEY, temperature=0.2, top_p=1, top_k=1,
49
+ safety_settings={
50
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
51
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
52
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
53
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
54
+
55
+ },
56
+ )
57
+ template="""You are an advanced image captioning AI assistant for surveillance related images.
58
+ Your task is to refine and improve an initial image caption using relevant contextual information provided.
59
+ You will receive two inputs:
60
+ Input 1: {initial_caption} - This is the initial caption for the image, most likely grammatically incorrect
61
+ and incomplete sentence, generated by a separate not so good image captioning model.
62
+ Input 2: {context} - This is the contextual information that provides more details about the background
63
+ Your goal is to take the initial caption and the additional context, and produce a new, refined caption that
64
+ incorporates the contextual details.
65
+ Please do not speculate things which are not provided. The final caption should be grammatically correct.
66
+ Please output only the final caption."""
67
+
68
+ prompt_template = PromptTemplate(
69
+ template=template,
70
+ input_variables=["initial_caption", "context"],
71
+ )
72
+
73
+
74
+
75
 
76
  # Title
77
  st.title("Image Caption Surveillance")
 
99
  caption = image_to_text(image_url, max_new_tokens=200)
100
  initial_caption= caption[0]['generated_text']
101
 
102
+ ##LLM
103
+ prompt=prompt_template.format(initial_caption=initial_caption, context=context)
104
+ response = llm.invoke(prompt)
105
+ final_caption = response.content
106
+
107
  ##Output
108
  st.write("context: ", context)
109
  st.write("initial_caption: ", initial_caption)
110
+ st.write("final_caption: ", final_caption)
111
  except Exception as e:
112
  st.error(f"Error: {e}")
113
  else: