Spaces:

NursNurs
/

Meme-caption-generator

App Files Files Community

nursulu commited on Sep 16

Commit

77e0511

•

1 Parent(s): 6893fa1

Update

Browse files

Files changed (7) hide show

app.py +14 -11
fonts/Anton/Anton-Regular.ttf +0 -0
fonts/Anton/OFL.txt +93 -0
utils/__pycache__/image_utils.cpython-311.pyc +0 -0
utils/__pycache__/model_utils.cpython-311.pyc +0 -0
utils/image_utils.py +8 -30
utils/model_utils.py +27 -11

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import requests
 import json
 import os
 import re
 import torch
 from peft import PeftModel, PeftConfig
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -41,9 +42,11 @@ def load_models():
 # x = st.slider('Select a value')
 # st.write(x, 'squared is', x * x)
-def generate_meme_from_image(img_path, base_model, tokenizer, hf_token, output_dir, device='cuda'):
   caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
-  image = overlay_caption(caption, img_path, output_dir)
   return image, caption
 st.title("Image Upload and Processing App")
@@ -58,7 +61,7 @@ def main():
     uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
     # Input widget to add Hugging Face token
-    hf_token = st.text_input("Enter your Hugging Face Token", type="password")
     # Dropdown to select mood
     # mood = st.selectbox("Select Mood", options=["happy", "angry"])
@@ -78,16 +81,16 @@ def main():
                 # Display the output
                 st.image(image, caption=f"Generated Meme: {caption}")
-                # Optionally allow downloading the meme
-                buf = io.BytesIO()
-                image.save(buf, format="PNG")
-                byte_im = buf.getvalue()
                 st.download_button(
-                    label="Download Meme",
-                    data=byte_im,
-                    file_name="generated_meme.png",
-                    mime="image/png"
                 )
 if __name__ == '__main__':

 import json
 import os
 import re
 import torch
 from peft import PeftModel, PeftConfig
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # x = st.slider('Select a value')
 # st.write(x, 'squared is', x * x)
+def generate_meme_from_image(img_path, base_model, tokenizer, hf_token, device='cuda'):
   caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
+  print(caption)
+  image = overlay_caption(caption, img_path)
   return image, caption
 st.title("Image Upload and Processing App")
     uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
     # Input widget to add Hugging Face token
+    hf_token = st.text_input("Enter your Hugging Face Token", type='default')
     # Dropdown to select mood
     # mood = st.selectbox("Select Mood", options=["happy", "angry"])
                 # Display the output
                 st.image(image, caption=f"Generated Meme: {caption}")
+                # # Optionally allow downloading the meme
+                # buf = io.BytesIO()
+                # image.save(buf, format="PNG")
+                # byte_im = buf.getvalue()
                 st.download_button(
+                    label="Download Image with Caption",
+                    data=image,
+                    file_name="captioned_image.jpg",
+                    mime="image/jpeg"
                 )
 if __name__ == '__main__':

fonts/Anton/Anton-Regular.ttf ADDED Viewed

Binary file (162 kB). View file

fonts/Anton/OFL.txt ADDED Viewed

	@@ -0,0 +1,93 @@

+Copyright 2020 The Anton Project Authors (https://github.com/googlefonts/AntonFont.git)
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at:
+https://openfontlicense.org
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded,
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting -- in part or in whole -- any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.

utils/__pycache__/image_utils.cpython-311.pyc CHANGED Viewed

Binary files a/utils/__pycache__/image_utils.cpython-311.pyc and b/utils/__pycache__/image_utils.cpython-311.pyc differ

utils/__pycache__/model_utils.cpython-311.pyc CHANGED Viewed

Binary files a/utils/__pycache__/model_utils.cpython-311.pyc and b/utils/__pycache__/model_utils.cpython-311.pyc differ

utils/image_utils.py CHANGED Viewed

@@ -2,30 +2,7 @@ import os
 import re
 from PIL import Image, ImageDraw, ImageFont
 import textwrap
-def get_unique_filename(filename):
-    """
-    Generate a unique filename by appending a number if a file with the same name already exists.
-    """
-    if not os.path.exists(filename):
-        return filename
-    base, ext = os.path.splitext(filename)
-    counter = 1
-    new_filename = f"{base}_{counter}{ext}"
-    while os.path.exists(new_filename):
-        counter += 1
-        new_filename = f"{base}_{counter}{ext}"
-    return new_filename
-def save_image_with_unique_name(image, path):
-    unique_path = get_unique_filename(path)
-    image.save(unique_path)
-    print(f"Image saved as: {unique_path}")
 def find_text_in_answer(text):
     print("Full caption:", text)
@@ -97,8 +74,8 @@ def calculate_text_height(caption, font, max_width):
     draw = ImageDraw.Draw(image)
     return draw_text(draw, caption, (0, 0), font, max_width)
-def add_caption(image_path, caption, output_path, top_margin=10, bottom_margin=10, max_caption_length=10, min_distance_from_bottom_mm=10):
-    image = Image.open(image_path)
     draw = ImageDraw.Draw(image)
     width, height = image.size
@@ -136,13 +113,14 @@ def add_caption(image_path, caption, output_path, top_margin=10, bottom_margin=1
         bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
         draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
-    save_image_with_unique_name(image, output_path)
     return image
-def overlay_caption(text, img_path, output_dir):
-  img_name = img_path.split("/")[-1]
   text = find_text_in_answer(text)
   text = text.strip(".")
-  image = add_caption(img_path, text, output_dir+"/"+img_name)
   return image

 import re
 from PIL import Image, ImageDraw, ImageFont
 import textwrap
+import io
 def find_text_in_answer(text):
     print("Full caption:", text)
     draw = ImageDraw.Draw(image)
     return draw_text(draw, caption, (0, 0), font, max_width)
+def add_caption(image_path, caption, top_margin=10, bottom_margin=10, max_caption_length=10, min_distance_from_bottom_mm=10):
+    image = image_path
     draw = ImageDraw.Draw(image)
     width, height = image.size
         bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
         draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    return buffered.getvalue()
     return image
+def overlay_caption(text, img_path):
   text = find_text_in_answer(text)
   text = text.strip(".")
+  image = add_caption(img_path, text)
   return image

utils/model_utils.py CHANGED Viewed

@@ -6,26 +6,37 @@ import os
 from tqdm import tqdm
 import re
 import torch
 def query_clip(data, hf_token):
     API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
     headers = {"Authorization": f"Bearer {hf_token}"}
-    with open(data["image_path"], "rb") as f:
-        img = f.read()
     payload={
 		"parameters": data["parameters"],
-		"inputs": base64.b64encode(img).decode("utf-8")
 	}
     response = requests.post(API_URL, headers=headers, json=payload)
     return response.json()
-def get_sentiment(img_path, hf_token):
     print("Getting the sentiment of the image...")
     output = query_clip({
-        "image_path": img_path,
         "parameters": {"candidate_labels": ["angry", "happy"]},
     }, hf_token)
     try:
@@ -36,18 +47,22 @@ def get_sentiment(img_path, hf_token):
         print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
-def query_blip(filename, hf_token):
     API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
     headers = {"Authorization": f"Bearer {hf_token}"}
-    with open(filename, "rb") as f:
-        file = f.read()
-    response = requests.post(API_URL, headers=headers, data=file)
     return response.json()
-def get_description(img_path, hf_token):
     print("Getting the context of the image...")
-    output = query_blip(img_path, hf_token)
     try:
         print("Context:", output[0]['generated_text'])
@@ -72,6 +87,7 @@ def get_model_caption(img_path, base_model, tokenizer, hf_token, device='cuda'):
     print("Generating captions...")
     encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
     model_inputs = encodeds.to(device)
     base_model.set_adapter(sentiment)
     base_model.to(device)
     generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)

 from tqdm import tqdm
 import re
 import torch
+import io
+from PIL import Image
+def image_to_bytes(image):
+    """Convert PIL Image to bytes."""
+    buffer = io.BytesIO()
+    image.save(buffer, format="JPEG")  # Adjust format if necessary
+    return buffer.getvalue()
 def query_clip(data, hf_token):
     API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
     headers = {"Authorization": f"Bearer {hf_token}"}
+    img = data['image']
+    img_bytes = image_to_bytes(img)
+    image = Image.open(io.BytesIO(img_bytes))
+    encoded_img = base64.b64encode(img_bytes).decode("utf-8")
     payload={
 		"parameters": data["parameters"],
+		"inputs": encoded_img
 	}
     response = requests.post(API_URL, headers=headers, json=payload)
     return response.json()
+def get_sentiment(img, hf_token):
     print("Getting the sentiment of the image...")
     output = query_clip({
+        "image": img,
         "parameters": {"candidate_labels": ["angry", "happy"]},
     }, hf_token)
     try:
         print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
+def query_blip(img, hf_token):
     API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
     headers = {"Authorization": f"Bearer {hf_token}"}
+    img_bytes = image_to_bytes(img)
+    files = {
+        'file': ('image.jpg', img_bytes, 'image/jpeg')
+    }
+    response = requests.post(API_URL, headers=headers, data=files)
     return response.json()
+def get_description(img, hf_token):
     print("Getting the context of the image...")
+    output = query_blip(img, hf_token)
     try:
         print("Context:", output[0]['generated_text'])
     print("Generating captions...")
     encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
     model_inputs = encodeds.to(device)
+    print("sentiment", sentiment)
     base_model.set_adapter(sentiment)
     base_model.to(device)
     generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)