Spaces:

asach
/

Catalog-Digitization

Sleeping

App Files Files Community

Vrushali commited on Feb 14, 2024

Commit

e6bde5b

1 Parent(s): 5429c6f

Add OpenAI Whisper integration and update requirements.txt

Browse files

Files changed (6) hide show

src/app.py +61 -32
src/app/api/module/llm_vision.py +1 -1
src/app/api/module/prompts/base.py +29 -1
src/app_utils.py +69 -0
src/{app/api/module/audio_text.py → audio_text.py} +4 -3
src/requirements.txt +4 -1

src/app.py CHANGED Viewed

@@ -1,45 +1,74 @@
 import gradio as gr
 import pymysql
 import pandas as pd
 def get_total_number_of_products():
-    connection = connect_to_db()
-    cursor = connection.cursor()
-    # Execute SQL query to count total number of products
-    sql = "SELECT COUNT(*) AS total_products FROM api_database"
-    cursor.execute(sql)
-    result = cursor.fetchone()
-    total_products = result['total_products']
-    connection.close()
-    return total_products
-def search_products(search_query):
-    search_query = " " + search_query.lower() + " "
-    connection = connect_to_db()
-    cursor = connection.cursor()
-    sql = """
-        SELECT * FROM api_database
-        WHERE product_name LIKE %s OR description LIKE %s
-    """
-    cursor.execute(sql, ('%' + search_query + '%', '%' + search_query + '%'))
-    search_results = cursor.fetchall()
-    connection.close()
-    search_results_formatted = []
-    for result in search_results:
-        search_results_formatted.append(list(result.values()))
-    return search_results_formatted
-def sample_fun(first_image,voice_input, text_input):
-    return
 with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.pink)) as demo:
-    with gr.Tab("Add Your Image"):
-        voice_input = gr.Audio(label="Upload Audio")
         prodcut_id = gr.Textbox(label="Enter Product ID")
         with gr.Row():
             submit_button_tab_1 = gr.Button("Start")
@@ -56,7 +85,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, seconda
                                                'quantity', 'promotion_on_the_pack', 'type_of_packaging', 'mrp'])
-    submit_button_tab_1.click(fn=sample_fun,inputs=[voice_input,prodcut_id])
     submit_button_tab_4.click(fn=search_products,inputs=[embbed_text_search] ,outputs= dataframe_output_tab_4)
-demo.launch(server_name="0.0.0.0",server_port=9003)

 import gradio as gr
 import pymysql
 import pandas as pd
+import soundfile as sf
+from audio_text import whisper_openai
+from app_utils import voice_edit, extract_json_from_text, getname
+import uuid
 def get_total_number_of_products():
+    pass
+def search_products(search_query):
+    pass
+# def get_total_number_of_products():
+#     connection = connect_to_db()
+#     cursor = connection.cursor()
+#     # Execute SQL query to count total number of products
+#     sql = "SELECT COUNT(*) AS total_products FROM api_database"
+#     cursor.execute(sql)
+#     result = cursor.fetchone()
+#     total_products = result['total_products']
+#     connection.close()
+#     return total_products
+# def search_products(search_query):
+#     search_query = " " + search_query.lower() + " "
+#     connection = connect_to_db()
+#     cursor = connection.cursor()
+#     sql = """
+#         SELECT * FROM api_database
+#         WHERE product_name LIKE %s OR description LIKE %s
+#     """
+#     cursor.execute(sql, ('%' + search_query + '%', '%' + search_query + '%'))
+#     search_results = cursor.fetchall()
+#     connection.close()
+#     search_results_formatted = []
+#     for result in search_results:
+#         search_results_formatted.append(list(result.values()))
+#     return search_results_formatted
+def sample_fun(voice_input, product_id):
+    audio_path = str(uuid.uuid4().hex) + ".wav"
+    print(voice_input)
+    sample_rate,audio_data = voice_input
+    # audio_data = audio_data.reshape(-1, 1)
+    sf.write(audio_path, audio_data, sample_rate)
+    # print("Product ID:", product_id)
+    transcription = whisper_openai(audio_path)
+    # print("Transcription:", transcription)
+    prompt = voice_edit.format(text = transcription)
+    # print("Prompt:", prompt)
+    name = getname(prompt)
+    print("Name:", name)
+    try:
+        json_data = extract_json_from_text(name)
+    except Exception as e:
+        print(f"-->Exception occurred while extracting JSON: {str(e)}")
+    json_data['product_id'] = product_id
+    return json_data
 with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.pink)) as demo:
+    with gr.Tab("Edit by Audio"):
+        voice_input = gr.Audio(sources=["microphone"])
         prodcut_id = gr.Textbox(label="Enter Product ID")
         with gr.Row():
             submit_button_tab_1 = gr.Button("Start")
                                                'quantity', 'promotion_on_the_pack', 'type_of_packaging', 'mrp'])
+    submit_button_tab_1.click(fn=sample_fun,inputs=[voice_input,prodcut_id], outputs=prodcut_id)
     submit_button_tab_4.click(fn=search_products,inputs=[embbed_text_search] ,outputs= dataframe_output_tab_4)
+demo.launch(server_name="0.0.0.0",server_port=8007)

src/app/api/module/llm_vision.py CHANGED Viewed

	@@ -70,7 +70,7 @@ class OpenAIVision:
70
71	return completion.choices[0].message
72
73	-
74
75
76


70
71	return completion.choices[0].message
72
73	+
74
75
76

src/app/api/module/prompts/base.py CHANGED Viewed

@@ -41,4 +41,32 @@ gpt3 = dedent(""" I am providing you with a OCR text about a product.
               I want you to provide me with the name of prodcut in following JSON format:
                "product_name" : "BRU instant coffee".
-              """)

               I want you to provide me with the name of prodcut in following JSON format:
                "product_name" : "BRU instant coffee".
+              """)
+voice_edit = dedent("""
+        ### Instruction:
+        audio transcription starts here
+        {text}
+        audio transcription ends here
+        I want you to provide the json format with all the details filled as mentioned below by getting information from the audio transcription.
+        ( return "null" where you don't have a answer)
+        "brand": "sample_brand",
+        "mrp": "12", ##price of product
+        "unit": "per pack",
+        "Quantity": 1,  ##num of products visible
+        "parent_category": "from the above given list",
+        "ingredients": ["ingredient1", "ingredient2", "ingredient3"],
+        "calorie_count": "Would be in numbers",
+        "marketed_by": "sample_marketer",
+        "manufactured_by": "sample_manufacturer",
+        "manufactured_in_country": "Country XYZ",
+        "type_of_packaging": "Box",
+        "promotion_on_the_pack": "if any",
+        "type_of_product": "give this your understanding",
+        "pack_of_or_no_of_units": "No. of Units"
+        Only return the output in the required json format with string in enclosed in double quotes.
+        """)

src/app_utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from textwrap import dedent
+import base64
+import requests
+from openai import OpenAI
+import os
+from decouple import config
+import json
+OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
+voice_edit = dedent("""
+        ### Instruction:
+        audio transcription starts here
+        {text}
+        audio transcription ends here
+        I want you to provide the json format with all the details filled as mentioned below by getting information from the audio transcription.
+        ( return "null" where you don't have a answer)
+        "brand": sample_brand
+        "mrp": 12, ##price of product
+        "unit": per pack
+        "Quantity": 1,  ##num of products visible
+        "parent_category": from the above given list"
+        "ingredients": ["ingredient1", "ingredient2", "ingredient3"] ##list of ingredients
+        "calorie_count": 12 ##calorie count
+        "marketed_by":  ##sample_marketer
+        "manufactured_by": ##manufacturer
+        "manufactured_in_country": India ##Country of manufacture
+        "type_of_packaging": Box
+        "promotion_on_the_pack":
+        "type_of_product":  ## give this your understanding of product
+        "pack_of_or_no_of_units":  ##No. of Units
+        "description": ##description of product
+        "weight": ##weight of product
+        Only return the output in the required json format with string in enclosed in double quotes.
+        """)
+def getname(prompt):
+        client = OpenAI(api_key=OPENAI_API_KEY)
+        completion = client.chat.completions.create(
+        response_format={ "type": "json_object" },
+        model="gpt-3.5-turbo-1106",
+        messages=[
+            {"role": "user", "content": prompt , }
+        ]
+        )
+        return completion.choices[0].message.content
+def extract_json_from_text(text):
+    text = str(text)
+    print(f"Extracting JSON from text: {text}")
+    try:
+        # Find the JSON part within the text
+        start_index = text.find('{')
+        end_index = text.rfind('}') + 1
+        json_part = text[start_index:end_index]
+        json_part = json.loads(json_part.strip())
+        return json_part
+    except Exception as e:
+        print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m")
+        return e

src/{app/api/module/audio_text.py → audio_text.py} RENAMED Viewed

@@ -1,11 +1,12 @@
 # from whisper_jax import FlaxWhisperPipline
 # import jax.numpy as jnp
-import whisper
-print(whisper.__file__)
 from openai import OpenAI
-from module.config import OPENAI_API_KEY
 import os
 client = OpenAI()
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

 # from whisper_jax import FlaxWhisperPipline
 # import jax.numpy as jnp
+# import whisper
+# print(whisper.__file__)
 from openai import OpenAI
+from decouple import config
 import os
+OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
 client = OpenAI()
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

src/requirements.txt CHANGED Viewed

@@ -5,4 +5,7 @@ azure-ai-formrecognizer
 easyocr
 chromadb
 langchain_openai
-unstructured

 easyocr
 chromadb
 langchain_openai
+unstructured
+gradio
+pymysql
+whisper==1.1.10