Spaces:
Running
Running
Add OpenAI Whisper integration and update requirements.txt
Browse files- src/app.py +61 -32
- src/app/api/module/llm_vision.py +1 -1
- src/app/api/module/prompts/base.py +29 -1
- src/app_utils.py +69 -0
- src/{app/api/module/audio_text.py → audio_text.py} +4 -3
- src/requirements.txt +4 -1
src/app.py
CHANGED
@@ -1,45 +1,74 @@
|
|
1 |
import gradio as gr
|
2 |
import pymysql
|
3 |
import pandas as pd
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def get_total_number_of_products():
|
6 |
-
|
7 |
-
cursor = connection.cursor()
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.pink)) as demo:
|
40 |
|
41 |
-
with gr.Tab("
|
42 |
-
voice_input = gr.Audio(
|
43 |
prodcut_id = gr.Textbox(label="Enter Product ID")
|
44 |
with gr.Row():
|
45 |
submit_button_tab_1 = gr.Button("Start")
|
@@ -56,7 +85,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, seconda
|
|
56 |
'quantity', 'promotion_on_the_pack', 'type_of_packaging', 'mrp'])
|
57 |
|
58 |
|
59 |
-
submit_button_tab_1.click(fn=sample_fun,inputs=[voice_input,prodcut_id])
|
60 |
submit_button_tab_4.click(fn=search_products,inputs=[embbed_text_search] ,outputs= dataframe_output_tab_4)
|
61 |
|
62 |
-
demo.launch(server_name="0.0.0.0",server_port=
|
|
|
1 |
import gradio as gr
|
2 |
import pymysql
|
3 |
import pandas as pd
|
4 |
+
import soundfile as sf
|
5 |
+
from audio_text import whisper_openai
|
6 |
+
from app_utils import voice_edit, extract_json_from_text, getname
|
7 |
+
import uuid
|
8 |
|
9 |
def get_total_number_of_products():
|
10 |
+
pass
|
|
|
11 |
|
12 |
+
def search_products(search_query):
|
13 |
+
pass
|
14 |
+
# def get_total_number_of_products():
|
15 |
+
# connection = connect_to_db()
|
16 |
+
# cursor = connection.cursor()
|
17 |
|
18 |
+
# # Execute SQL query to count total number of products
|
19 |
+
# sql = "SELECT COUNT(*) AS total_products FROM api_database"
|
20 |
+
# cursor.execute(sql)
|
21 |
+
# result = cursor.fetchone()
|
22 |
+
# total_products = result['total_products']
|
23 |
|
24 |
+
# connection.close()
|
25 |
|
26 |
+
# return total_products
|
27 |
+
|
28 |
+
# def search_products(search_query):
|
29 |
+
# search_query = " " + search_query.lower() + " "
|
30 |
+
# connection = connect_to_db()
|
31 |
+
# cursor = connection.cursor()
|
32 |
+
# sql = """
|
33 |
+
# SELECT * FROM api_database
|
34 |
+
# WHERE product_name LIKE %s OR description LIKE %s
|
35 |
+
# """
|
36 |
+
# cursor.execute(sql, ('%' + search_query + '%', '%' + search_query + '%'))
|
37 |
+
# search_results = cursor.fetchall()
|
38 |
+
|
39 |
+
# connection.close()
|
40 |
+
# search_results_formatted = []
|
41 |
+
# for result in search_results:
|
42 |
+
# search_results_formatted.append(list(result.values()))
|
43 |
+
# return search_results_formatted
|
44 |
+
|
45 |
+
|
46 |
+
def sample_fun(voice_input, product_id):
|
47 |
+
|
48 |
+
audio_path = str(uuid.uuid4().hex) + ".wav"
|
49 |
+
print(voice_input)
|
50 |
+
sample_rate,audio_data = voice_input
|
51 |
+
# audio_data = audio_data.reshape(-1, 1)
|
52 |
+
sf.write(audio_path, audio_data, sample_rate)
|
53 |
+
# print("Product ID:", product_id)
|
54 |
+
transcription = whisper_openai(audio_path)
|
55 |
+
# print("Transcription:", transcription)
|
56 |
+
prompt = voice_edit.format(text = transcription)
|
57 |
+
# print("Prompt:", prompt)
|
58 |
+
name = getname(prompt)
|
59 |
+
print("Name:", name)
|
60 |
+
try:
|
61 |
+
json_data = extract_json_from_text(name)
|
62 |
+
except Exception as e:
|
63 |
+
print(f"-->Exception occurred while extracting JSON: {str(e)}")
|
64 |
+
json_data['product_id'] = product_id
|
65 |
+
|
66 |
+
return json_data
|
67 |
|
68 |
with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.pink)) as demo:
|
69 |
|
70 |
+
with gr.Tab("Edit by Audio"):
|
71 |
+
voice_input = gr.Audio(sources=["microphone"])
|
72 |
prodcut_id = gr.Textbox(label="Enter Product ID")
|
73 |
with gr.Row():
|
74 |
submit_button_tab_1 = gr.Button("Start")
|
|
|
85 |
'quantity', 'promotion_on_the_pack', 'type_of_packaging', 'mrp'])
|
86 |
|
87 |
|
88 |
+
submit_button_tab_1.click(fn=sample_fun,inputs=[voice_input,prodcut_id], outputs=prodcut_id)
|
89 |
submit_button_tab_4.click(fn=search_products,inputs=[embbed_text_search] ,outputs= dataframe_output_tab_4)
|
90 |
|
91 |
+
demo.launch(server_name="0.0.0.0",server_port=8007)
|
src/app/api/module/llm_vision.py
CHANGED
@@ -70,7 +70,7 @@ class OpenAIVision:
|
|
70 |
|
71 |
return completion.choices[0].message
|
72 |
|
73 |
-
|
74 |
|
75 |
|
76 |
|
|
|
70 |
|
71 |
return completion.choices[0].message
|
72 |
|
73 |
+
|
74 |
|
75 |
|
76 |
|
src/app/api/module/prompts/base.py
CHANGED
@@ -41,4 +41,32 @@ gpt3 = dedent(""" I am providing you with a OCR text about a product.
|
|
41 |
I want you to provide me with the name of prodcut in following JSON format:
|
42 |
"product_name" : "BRU instant coffee".
|
43 |
|
44 |
-
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
I want you to provide me with the name of prodcut in following JSON format:
|
42 |
"product_name" : "BRU instant coffee".
|
43 |
|
44 |
+
""")
|
45 |
+
|
46 |
+
|
47 |
+
voice_edit = dedent("""
|
48 |
+
### Instruction:
|
49 |
+
audio transcription starts here
|
50 |
+
{text}
|
51 |
+
audio transcription ends here
|
52 |
+
|
53 |
+
I want you to provide the json format with all the details filled as mentioned below by getting information from the audio transcription.
|
54 |
+
( return "null" where you don't have a answer)
|
55 |
+
|
56 |
+
"brand": "sample_brand",
|
57 |
+
"mrp": "12", ##price of product
|
58 |
+
"unit": "per pack",
|
59 |
+
"Quantity": 1, ##num of products visible
|
60 |
+
"parent_category": "from the above given list",
|
61 |
+
"ingredients": ["ingredient1", "ingredient2", "ingredient3"],
|
62 |
+
"calorie_count": "Would be in numbers",
|
63 |
+
"marketed_by": "sample_marketer",
|
64 |
+
"manufactured_by": "sample_manufacturer",
|
65 |
+
"manufactured_in_country": "Country XYZ",
|
66 |
+
"type_of_packaging": "Box",
|
67 |
+
"promotion_on_the_pack": "if any",
|
68 |
+
"type_of_product": "give this your understanding",
|
69 |
+
"pack_of_or_no_of_units": "No. of Units"
|
70 |
+
|
71 |
+
Only return the output in the required json format with string in enclosed in double quotes.
|
72 |
+
""")
|
src/app_utils.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from textwrap import dedent
|
2 |
+
import base64
|
3 |
+
import requests
|
4 |
+
from openai import OpenAI
|
5 |
+
import os
|
6 |
+
from decouple import config
|
7 |
+
import json
|
8 |
+
|
9 |
+
OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
|
10 |
+
|
11 |
+
|
12 |
+
voice_edit = dedent("""
|
13 |
+
### Instruction:
|
14 |
+
audio transcription starts here
|
15 |
+
{text}
|
16 |
+
audio transcription ends here
|
17 |
+
|
18 |
+
I want you to provide the json format with all the details filled as mentioned below by getting information from the audio transcription.
|
19 |
+
( return "null" where you don't have a answer)
|
20 |
+
|
21 |
+
"brand": sample_brand
|
22 |
+
"mrp": 12, ##price of product
|
23 |
+
"unit": per pack
|
24 |
+
"Quantity": 1, ##num of products visible
|
25 |
+
"parent_category": from the above given list"
|
26 |
+
"ingredients": ["ingredient1", "ingredient2", "ingredient3"] ##list of ingredients
|
27 |
+
"calorie_count": 12 ##calorie count
|
28 |
+
"marketed_by": ##sample_marketer
|
29 |
+
"manufactured_by": ##manufacturer
|
30 |
+
"manufactured_in_country": India ##Country of manufacture
|
31 |
+
"type_of_packaging": Box
|
32 |
+
"promotion_on_the_pack":
|
33 |
+
"type_of_product": ## give this your understanding of product
|
34 |
+
"pack_of_or_no_of_units": ##No. of Units
|
35 |
+
"description": ##description of product
|
36 |
+
"weight": ##weight of product
|
37 |
+
|
38 |
+
|
39 |
+
Only return the output in the required json format with string in enclosed in double quotes.
|
40 |
+
""")
|
41 |
+
|
42 |
+
def getname(prompt):
|
43 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
44 |
+
completion = client.chat.completions.create(
|
45 |
+
response_format={ "type": "json_object" },
|
46 |
+
model="gpt-3.5-turbo-1106",
|
47 |
+
messages=[
|
48 |
+
{"role": "user", "content": prompt , }
|
49 |
+
]
|
50 |
+
|
51 |
+
)
|
52 |
+
|
53 |
+
return completion.choices[0].message.content
|
54 |
+
|
55 |
+
|
56 |
+
def extract_json_from_text(text):
|
57 |
+
text = str(text)
|
58 |
+
print(f"Extracting JSON from text: {text}")
|
59 |
+
try:
|
60 |
+
# Find the JSON part within the text
|
61 |
+
start_index = text.find('{')
|
62 |
+
end_index = text.rfind('}') + 1
|
63 |
+
json_part = text[start_index:end_index]
|
64 |
+
json_part = json.loads(json_part.strip())
|
65 |
+
return json_part
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m")
|
69 |
+
return e
|
src/{app/api/module/audio_text.py → audio_text.py}
RENAMED
@@ -1,11 +1,12 @@
|
|
1 |
# from whisper_jax import FlaxWhisperPipline
|
2 |
# import jax.numpy as jnp
|
3 |
-
import whisper
|
4 |
-
print(whisper.__file__)
|
5 |
from openai import OpenAI
|
6 |
-
from
|
7 |
import os
|
8 |
|
|
|
9 |
client = OpenAI()
|
10 |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
11 |
|
|
|
1 |
# from whisper_jax import FlaxWhisperPipline
|
2 |
# import jax.numpy as jnp
|
3 |
+
# import whisper
|
4 |
+
# print(whisper.__file__)
|
5 |
from openai import OpenAI
|
6 |
+
from decouple import config
|
7 |
import os
|
8 |
|
9 |
+
OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
|
10 |
client = OpenAI()
|
11 |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
12 |
|
src/requirements.txt
CHANGED
@@ -5,4 +5,7 @@ azure-ai-formrecognizer
|
|
5 |
easyocr
|
6 |
chromadb
|
7 |
langchain_openai
|
8 |
-
unstructured
|
|
|
|
|
|
|
|
5 |
easyocr
|
6 |
chromadb
|
7 |
langchain_openai
|
8 |
+
unstructured
|
9 |
+
gradio
|
10 |
+
pymysql
|
11 |
+
whisper==1.1.10
|