nursulu commited on
Commit
77e0511
1 Parent(s): 6893fa1
app.py CHANGED
@@ -11,6 +11,7 @@ import requests
11
  import json
12
  import os
13
  import re
 
14
  import torch
15
  from peft import PeftModel, PeftConfig
16
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -41,9 +42,11 @@ def load_models():
41
 
42
  # x = st.slider('Select a value')
43
  # st.write(x, 'squared is', x * x)
44
- def generate_meme_from_image(img_path, base_model, tokenizer, hf_token, output_dir, device='cuda'):
 
45
  caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
46
- image = overlay_caption(caption, img_path, output_dir)
 
47
  return image, caption
48
 
49
  st.title("Image Upload and Processing App")
@@ -58,7 +61,7 @@ def main():
58
  uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
59
 
60
  # Input widget to add Hugging Face token
61
- hf_token = st.text_input("Enter your Hugging Face Token", type="password")
62
 
63
  # Dropdown to select mood
64
  # mood = st.selectbox("Select Mood", options=["happy", "angry"])
@@ -78,16 +81,16 @@ def main():
78
  # Display the output
79
  st.image(image, caption=f"Generated Meme: {caption}")
80
 
81
- # Optionally allow downloading the meme
82
- buf = io.BytesIO()
83
- image.save(buf, format="PNG")
84
- byte_im = buf.getvalue()
85
 
86
  st.download_button(
87
- label="Download Meme",
88
- data=byte_im,
89
- file_name="generated_meme.png",
90
- mime="image/png"
91
  )
92
 
93
  if __name__ == '__main__':
 
11
  import json
12
  import os
13
  import re
14
+
15
  import torch
16
  from peft import PeftModel, PeftConfig
17
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
42
 
43
  # x = st.slider('Select a value')
44
  # st.write(x, 'squared is', x * x)
45
+
46
+ def generate_meme_from_image(img_path, base_model, tokenizer, hf_token, device='cuda'):
47
  caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
48
+ print(caption)
49
+ image = overlay_caption(caption, img_path)
50
  return image, caption
51
 
52
  st.title("Image Upload and Processing App")
 
61
  uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
62
 
63
  # Input widget to add Hugging Face token
64
+ hf_token = st.text_input("Enter your Hugging Face Token", type='default')
65
 
66
  # Dropdown to select mood
67
  # mood = st.selectbox("Select Mood", options=["happy", "angry"])
 
81
  # Display the output
82
  st.image(image, caption=f"Generated Meme: {caption}")
83
 
84
+ # # Optionally allow downloading the meme
85
+ # buf = io.BytesIO()
86
+ # image.save(buf, format="PNG")
87
+ # byte_im = buf.getvalue()
88
 
89
  st.download_button(
90
+ label="Download Image with Caption",
91
+ data=image,
92
+ file_name="captioned_image.jpg",
93
+ mime="image/jpeg"
94
  )
95
 
96
  if __name__ == '__main__':
fonts/Anton/Anton-Regular.ttf ADDED
Binary file (162 kB). View file
 
fonts/Anton/OFL.txt ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2020 The Anton Project Authors (https://github.com/googlefonts/AntonFont.git)
2
+
3
+ This Font Software is licensed under the SIL Open Font License, Version 1.1.
4
+ This license is copied below, and is also available with a FAQ at:
5
+ https://openfontlicense.org
6
+
7
+
8
+ -----------------------------------------------------------
9
+ SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
10
+ -----------------------------------------------------------
11
+
12
+ PREAMBLE
13
+ The goals of the Open Font License (OFL) are to stimulate worldwide
14
+ development of collaborative font projects, to support the font creation
15
+ efforts of academic and linguistic communities, and to provide a free and
16
+ open framework in which fonts may be shared and improved in partnership
17
+ with others.
18
+
19
+ The OFL allows the licensed fonts to be used, studied, modified and
20
+ redistributed freely as long as they are not sold by themselves. The
21
+ fonts, including any derivative works, can be bundled, embedded,
22
+ redistributed and/or sold with any software provided that any reserved
23
+ names are not used by derivative works. The fonts and derivatives,
24
+ however, cannot be released under any other type of license. The
25
+ requirement for fonts to remain under this license does not apply
26
+ to any document created using the fonts or their derivatives.
27
+
28
+ DEFINITIONS
29
+ "Font Software" refers to the set of files released by the Copyright
30
+ Holder(s) under this license and clearly marked as such. This may
31
+ include source files, build scripts and documentation.
32
+
33
+ "Reserved Font Name" refers to any names specified as such after the
34
+ copyright statement(s).
35
+
36
+ "Original Version" refers to the collection of Font Software components as
37
+ distributed by the Copyright Holder(s).
38
+
39
+ "Modified Version" refers to any derivative made by adding to, deleting,
40
+ or substituting -- in part or in whole -- any of the components of the
41
+ Original Version, by changing formats or by porting the Font Software to a
42
+ new environment.
43
+
44
+ "Author" refers to any designer, engineer, programmer, technical
45
+ writer or other person who contributed to the Font Software.
46
+
47
+ PERMISSION & CONDITIONS
48
+ Permission is hereby granted, free of charge, to any person obtaining
49
+ a copy of the Font Software, to use, study, copy, merge, embed, modify,
50
+ redistribute, and sell modified and unmodified copies of the Font
51
+ Software, subject to the following conditions:
52
+
53
+ 1) Neither the Font Software nor any of its individual components,
54
+ in Original or Modified Versions, may be sold by itself.
55
+
56
+ 2) Original or Modified Versions of the Font Software may be bundled,
57
+ redistributed and/or sold with any software, provided that each copy
58
+ contains the above copyright notice and this license. These can be
59
+ included either as stand-alone text files, human-readable headers or
60
+ in the appropriate machine-readable metadata fields within text or
61
+ binary files as long as those fields can be easily viewed by the user.
62
+
63
+ 3) No Modified Version of the Font Software may use the Reserved Font
64
+ Name(s) unless explicit written permission is granted by the corresponding
65
+ Copyright Holder. This restriction only applies to the primary font name as
66
+ presented to the users.
67
+
68
+ 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
69
+ Software shall not be used to promote, endorse or advertise any
70
+ Modified Version, except to acknowledge the contribution(s) of the
71
+ Copyright Holder(s) and the Author(s) or with their explicit written
72
+ permission.
73
+
74
+ 5) The Font Software, modified or unmodified, in part or in whole,
75
+ must be distributed entirely under this license, and must not be
76
+ distributed under any other license. The requirement for fonts to
77
+ remain under this license does not apply to any document created
78
+ using the Font Software.
79
+
80
+ TERMINATION
81
+ This license becomes null and void if any of the above conditions are
82
+ not met.
83
+
84
+ DISCLAIMER
85
+ THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
87
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
88
+ OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
89
+ COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90
+ INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
91
+ DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
92
+ FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
93
+ OTHER DEALINGS IN THE FONT SOFTWARE.
utils/__pycache__/image_utils.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/image_utils.cpython-311.pyc and b/utils/__pycache__/image_utils.cpython-311.pyc differ
 
utils/__pycache__/model_utils.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/model_utils.cpython-311.pyc and b/utils/__pycache__/model_utils.cpython-311.pyc differ
 
utils/image_utils.py CHANGED
@@ -2,30 +2,7 @@ import os
2
  import re
3
  from PIL import Image, ImageDraw, ImageFont
4
  import textwrap
5
-
6
-
7
- def get_unique_filename(filename):
8
- """
9
- Generate a unique filename by appending a number if a file with the same name already exists.
10
- """
11
- if not os.path.exists(filename):
12
- return filename
13
-
14
- base, ext = os.path.splitext(filename)
15
- counter = 1
16
- new_filename = f"{base}_{counter}{ext}"
17
-
18
- while os.path.exists(new_filename):
19
- counter += 1
20
- new_filename = f"{base}_{counter}{ext}"
21
-
22
- return new_filename
23
-
24
-
25
- def save_image_with_unique_name(image, path):
26
- unique_path = get_unique_filename(path)
27
- image.save(unique_path)
28
- print(f"Image saved as: {unique_path}")
29
 
30
  def find_text_in_answer(text):
31
  print("Full caption:", text)
@@ -97,8 +74,8 @@ def calculate_text_height(caption, font, max_width):
97
  draw = ImageDraw.Draw(image)
98
  return draw_text(draw, caption, (0, 0), font, max_width)
99
 
100
- def add_caption(image_path, caption, output_path, top_margin=10, bottom_margin=10, max_caption_length=10, min_distance_from_bottom_mm=10):
101
- image = Image.open(image_path)
102
  draw = ImageDraw.Draw(image)
103
  width, height = image.size
104
 
@@ -136,13 +113,14 @@ def add_caption(image_path, caption, output_path, top_margin=10, bottom_margin=1
136
  bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
137
  draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
138
 
139
- save_image_with_unique_name(image, output_path)
 
 
140
  return image
141
 
142
 
143
- def overlay_caption(text, img_path, output_dir):
144
- img_name = img_path.split("/")[-1]
145
  text = find_text_in_answer(text)
146
  text = text.strip(".")
147
- image = add_caption(img_path, text, output_dir+"/"+img_name)
148
  return image
 
2
  import re
3
  from PIL import Image, ImageDraw, ImageFont
4
  import textwrap
5
+ import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def find_text_in_answer(text):
8
  print("Full caption:", text)
 
74
  draw = ImageDraw.Draw(image)
75
  return draw_text(draw, caption, (0, 0), font, max_width)
76
 
77
+ def add_caption(image_path, caption, top_margin=10, bottom_margin=10, max_caption_length=10, min_distance_from_bottom_mm=10):
78
+ image = image_path
79
  draw = ImageDraw.Draw(image)
80
  width, height = image.size
81
 
 
113
  bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
114
  draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
115
 
116
+ buffered = io.BytesIO()
117
+ image.save(buffered, format="JPEG")
118
+ return buffered.getvalue()
119
  return image
120
 
121
 
122
+ def overlay_caption(text, img_path):
 
123
  text = find_text_in_answer(text)
124
  text = text.strip(".")
125
+ image = add_caption(img_path, text)
126
  return image
utils/model_utils.py CHANGED
@@ -6,26 +6,37 @@ import os
6
  from tqdm import tqdm
7
  import re
8
  import torch
 
 
9
 
 
 
 
 
 
10
 
11
 
12
  def query_clip(data, hf_token):
13
  API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
14
  headers = {"Authorization": f"Bearer {hf_token}"}
15
- with open(data["image_path"], "rb") as f:
16
- img = f.read()
 
 
 
 
17
  payload={
18
  "parameters": data["parameters"],
19
- "inputs": base64.b64encode(img).decode("utf-8")
20
  }
21
  response = requests.post(API_URL, headers=headers, json=payload)
22
  return response.json()
23
 
24
 
25
- def get_sentiment(img_path, hf_token):
26
  print("Getting the sentiment of the image...")
27
  output = query_clip({
28
- "image_path": img_path,
29
  "parameters": {"candidate_labels": ["angry", "happy"]},
30
  }, hf_token)
31
  try:
@@ -36,18 +47,22 @@ def get_sentiment(img_path, hf_token):
36
  print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
37
 
38
 
39
- def query_blip(filename, hf_token):
40
  API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
41
  headers = {"Authorization": f"Bearer {hf_token}"}
42
- with open(filename, "rb") as f:
43
- file = f.read()
44
- response = requests.post(API_URL, headers=headers, data=file)
 
 
 
 
45
  return response.json()
46
 
47
 
48
- def get_description(img_path, hf_token):
49
  print("Getting the context of the image...")
50
- output = query_blip(img_path, hf_token)
51
 
52
  try:
53
  print("Context:", output[0]['generated_text'])
@@ -72,6 +87,7 @@ def get_model_caption(img_path, base_model, tokenizer, hf_token, device='cuda'):
72
  print("Generating captions...")
73
  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
74
  model_inputs = encodeds.to(device)
 
75
  base_model.set_adapter(sentiment)
76
  base_model.to(device)
77
  generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)
 
6
  from tqdm import tqdm
7
  import re
8
  import torch
9
+ import io
10
+ from PIL import Image
11
 
12
+ def image_to_bytes(image):
13
+ """Convert PIL Image to bytes."""
14
+ buffer = io.BytesIO()
15
+ image.save(buffer, format="JPEG") # Adjust format if necessary
16
+ return buffer.getvalue()
17
 
18
 
19
  def query_clip(data, hf_token):
20
  API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
21
  headers = {"Authorization": f"Bearer {hf_token}"}
22
+ img = data['image']
23
+ img_bytes = image_to_bytes(img)
24
+ image = Image.open(io.BytesIO(img_bytes))
25
+
26
+ encoded_img = base64.b64encode(img_bytes).decode("utf-8")
27
+
28
  payload={
29
  "parameters": data["parameters"],
30
+ "inputs": encoded_img
31
  }
32
  response = requests.post(API_URL, headers=headers, json=payload)
33
  return response.json()
34
 
35
 
36
+ def get_sentiment(img, hf_token):
37
  print("Getting the sentiment of the image...")
38
  output = query_clip({
39
+ "image": img,
40
  "parameters": {"candidate_labels": ["angry", "happy"]},
41
  }, hf_token)
42
  try:
 
47
  print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
48
 
49
 
50
+ def query_blip(img, hf_token):
51
  API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
52
  headers = {"Authorization": f"Bearer {hf_token}"}
53
+
54
+ img_bytes = image_to_bytes(img)
55
+
56
+ files = {
57
+ 'file': ('image.jpg', img_bytes, 'image/jpeg')
58
+ }
59
+ response = requests.post(API_URL, headers=headers, data=files)
60
  return response.json()
61
 
62
 
63
+ def get_description(img, hf_token):
64
  print("Getting the context of the image...")
65
+ output = query_blip(img, hf_token)
66
 
67
  try:
68
  print("Context:", output[0]['generated_text'])
 
87
  print("Generating captions...")
88
  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
89
  model_inputs = encodeds.to(device)
90
+ print("sentiment", sentiment)
91
  base_model.set_adapter(sentiment)
92
  base_model.to(device)
93
  generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)