Spaces:

jitubutwal1441
/

image-to-story

Runtime error

App Files Files Community

jitendra.kasaudhan commited on Aug 25, 2023

Commit

c22d94a

•

1 Parent(s): 693660f

Initial project setup with all the required steps to run the project

Browse files

Files changed (2) hide show

app.py +100 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from dotenv import load_dotenv, find_dotenv
+from transformers import pipeline
+from langchain import LLMChain, OpenAI, PromptTemplate
+import requests
+import os
+# UI layer
+import streamlit as st
+load_dotenv(find_dotenv())
+HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+# It involves 3 steps
+# image to text
+def image_to_text(url, use_api=True):
+    if use_api:
+        API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+        headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
+        filename = url.split("/")[-1]
+        with open(filename, "rb") as f:
+            data = f.read()
+        response = requests.post(API_URL, headers=headers, data=data)
+        return response.json()[0]['generated_text']
+    # Download the model and use it, which is slow
+    captioner = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base")
+    # captioner("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
+    result = captioner(url)
+    return result[0]['generated_text']
+## [{'generated_text': 'two birds are standing next to each other '}]
+# LLM
+def generate_story(story_idea):
+    template = """
+        You are a professional story teller;
+        Generate a short story based on a simple narrative, the story should be no more than 50 words;
+        CONTEXT: {story_idea}
+        STORY:
+        """
+    prompt = PromptTemplate(input_variables=["story_idea"], template=template)
+    story_llm = LLMChain(llm=OpenAI(model_name='gpt-3.5-turbo-0301', temperature=1), prompt=prompt, verbose=True)
+    story = story_llm.run(story_idea)
+    return story
+# text to speech
+def text_to_speech(story):
+    API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
+    headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
+    payloads = {
+        "inputs": story
+    }
+    response = requests.post(API_URL, headers=headers, json=payloads)
+    with open("story_audio.flac", "wb") as file:
+        file.write(response.content)
+# caption = image_to_text("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
+# story = generate_story(story_idea="Two parrots singing a song")
+# text_to_speech(story="Two parrots singing a song")
+def main():
+    st.set_page_config(page_title="Upload any image to hear a nice story")
+    st.header("Listen to what your image has to tell you. JK DEMO APP")
+    uploaded_file = st.file_uploader("Choose an image...", type="jpg")
+    if uploaded_file is not None:
+        print(uploaded_file)
+        bytes_data = uploaded_file.getvalue()
+        with open(uploaded_file.name, "wb") as file:
+            file.write(bytes_data)
+        st.image(uploaded_file, caption="Uploaded image", use_column_width=True)
+        image_description = image_to_text(uploaded_file.name, use_api=True)
+        # Display image description on FE
+        with st.expander("Image Description"):
+            st.write(image_description)
+        story = generate_story(story_idea=image_description)
+        story_starter_text = "Yo ho Radio Nepal, prastut xa sun nai parne katha: "
+        story = story_starter_text + story
+        # Display story text on FE
+        with st.expander("Story"):
+            st.write(story)
+        # Display audio player on FE
+        text_to_speech(story=story)
+        st.audio("story_audio.flac")
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+python-dotenv
+transformers
+langchain
+tensorflow
+openai
+streamlit