Spaces:

jjz5463
/

Diary-AI

Paused

App Files Files Community

jjz5463 commited on Sep 29, 2024

Commit

78263be

1 Parent(s): abcf835

pipeline build up

Browse files

Files changed (8) hide show

Experiments/Baseline-Experimental.ipynb +0 -85
Experiments/Baseline/GUI.py +24 -0
Experiments/Baseline/baseline.py +12 -0
Experiments/Baseline/baseline_utils.py +107 -0
Experiments/Baseline/images/test_sample.jpeg +0 -0
Experiments/Baseline/images/writer.jpg +0 -0
Source/requirements.txt +0 -0
requirements.txt +6 -0

Experiments/Baseline-Experimental.ipynb DELETED Viewed

@@ -1,85 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "initial_id",
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# Utilize the Google Cloud Vision API to recognize text in the input images (diary images), https://cloud.google.com/vision.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "# Utilize the PaLM 2 Bison for Text model to conduct NLP tasks such as text summarization and condensing on the diary text, https://ai.google.dev/palm_docs/palm.\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "204930c13fd1e579"
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "# Utilize the Gemini 1.0 Pro Vision to input an image of the diary writer, and output a textual description of the image, https://ai.google.dev/gemini-api/docs/models/gemini.\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "7f0c7d788b8de177"
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "# Now that you have text from the diary and text describing the diary writer, you can utilize the SDXL-Turbo stable diffusion model to generate images https://huggingface.co/stabilityai/sdxl-turbo. You can try to output several images for a diary entry. Analyze how accurate the results, and think about what could be improved.\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "c475ca58dea760da"
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "# You can create a web or mobile-based GUI so that users can experience your solution. Suggested libraries include https://www.gradio.app/ or https://streamlit.io/.\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "ee3a3a8d4027bae3"
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

Experiments/Baseline/GUI.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import streamlit as st
+from PIL import Image
+# You can create a web or mobile-based GUI so that users can experience your solution. Suggested libraries include https://www.gradio.app/ or https://streamlit.io/.
+st.title('Handwritten Diary to Cartoon Book')
+uploaded_diary = st.file_uploader("Upload your diary image", type=["png", "jpg", "jpeg"])
+uploaded_writer_image = st.file_uploader("Upload your photo", type=["png", "jpg", "jpeg"])
+if uploaded_diary and uploaded_writer_image:
+    st.write("Analyzing your diary...")
+    diary_text = detect_text_in_image(uploaded_diary)
+    summarized_text = summarize_diary_text(diary_text)
+    st.write(f"Summarized Diary Text: {summarized_text}")
+    writer_description = analyze_writer_image(uploaded_writer_image)
+    st.write(f"Diary Writer Description: {writer_description}")
+    # Generate cartoon image
+    prompt = f"{summarized_text}, featuring a person who {writer_description}"
+    generated_image = generate_image(prompt)
+    st.image(generated_image, caption="Generated Cartoon Image")

Experiments/Baseline/baseline.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from baseline_utils import *
+from keys.keys import *
+diary_image_path = "images/test_sample.jpeg"
+writer_image_path = "images/writer.jpg"
+credentials_path = "keys/service_account_credentials.json"
+# Detect text from the image using the provided credentials
+detected_text = detect_text_in_image(diary_image_path, credentials_path)
+diary_summary = summarize_diary_text(detected_text, open_ai_keys)
+writer_summary = analyze_writer_image(writer_image_path, gemini_keys)
+generate_image(diary_summary, writer_summary)

Experiments/Baseline/baseline_utils.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import openai
+from google.cloud import vision
+from google.oauth2 import service_account
+import io
+import google.generativeai as genai
+from diffusers import AutoPipelineForText2Image
+import torch
+# Utilize the Google Cloud Vision API to recognize text in the
+# input images (diary images), https://cloud.google.com/vision.
+def detect_text_in_image(image_path, credentials_path):
+    # Load the service account key from the credentials JSON file
+    credentials = service_account.Credentials.from_service_account_file(credentials_path)
+    # Create a Vision API client using the credentials
+    client = vision.ImageAnnotatorClient(credentials=credentials)
+    # Open the image file
+    with io.open(image_path, 'rb') as image_file:
+        content = image_file.read()
+    # Create an image object for the Vision API
+    image = vision.Image(content=content)
+    # Use the Vision API to detect text
+    response = client.text_detection(image=image)
+    texts = response.text_annotations
+    # Check for errors in the response
+    if response.error.message:
+        raise Exception(f'{response.error.message}')
+    # Return the detected text or an empty string
+    return texts[0].description if texts else ''
+# Utilize the PaLM 2 Bison for Text model to conduct NLP tasks such as
+# text summarization and condensing on the diary text, https://ai.google.dev/palm_docs/palm.
+def summarize_diary_text(text, api_key):
+    # Initialize the OpenAI client
+    client = openai.Client(api_key=api_key)
+    # Use the client to call the chat completion API
+    response = client.chat.completions.create(
+        model="gpt-4",  # Use GPT-4
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Summarize the following diary entry: {text}"}
+        ],
+        max_tokens=150,
+        temperature=0.7,
+        n=1  # Number of completions to generate
+    )
+    # Extract the summary from the response
+    return response.choices[0].message.content
+# Utilize the Gemini 1.0 Pro Vision to input an image of the diary writer,
+# and output a textual description of the image,
+# https://ai.google.dev/gemini-api/docs/models/gemini.
+# Mock example assuming an API request to Gemini
+def analyze_writer_image(image_path, api_key):
+    genai.configure(api_key=api_key)
+    model = genai.GenerativeModel("gemini-1.5-flash")
+    myfile = genai.upload_file(image_path)
+    result = model.generate_content(
+        [myfile, "\n\n", "Can you give a textual description of the image?"]
+    )
+    return result.text
+# Now that you have text from the diary and text describing the diary writer,
+# you can utilize the SDXL-Turbo stable diffusion model to generate
+# images https://huggingface.co/stabilityai/sdxl-turbo.
+# You can try to output several images for a diary entry. Analyze how accurate the results,
+# and think about what could be improved.
+def generate_image(diary_text, writer_description):
+    pipe = AutoPipelineForText2Image.from_pretrained(
+        "stabilityai/sdxl-turbo",
+        torch_dtype=torch.float16,
+        variant="fp16",
+        cache_dir="./SDXL-Turbo")
+    # Check for available device: CUDA, MPS, or CPU
+    if torch.cuda.is_available():
+        device = "cuda"
+        print("Using CUDA backend.")
+    elif torch.backends.mps.is_available():
+        device = "mps"
+        print("Using MPS backend.")
+    else:
+        device = "cpu"
+        print("CUDA and MPS not available. Falling back to CPU.")
+    # Move the model to the selected device
+    pipe = pipe.to(device)
+    # Generate the image with a simple prompt
+    prompt = f'Writer Description: {writer_description} \n\n Diary: {diary_text}'
+    print(prompt)
+    image = pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]
+    # Save the generated image
+    image.save("generated_image.png")

Experiments/Baseline/images/test_sample.jpeg ADDED Viewed

Experiments/Baseline/images/writer.jpg ADDED Viewed

Source/requirements.txt DELETED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+openai
+google-cloud-vision
+google-auth
+google-generativeai
+diffusers
+torch