Vrushali commited on
Commit
e6bde5b
·
1 Parent(s): 5429c6f

Add OpenAI Whisper integration and update requirements.txt

Browse files
src/app.py CHANGED
@@ -1,45 +1,74 @@
1
  import gradio as gr
2
  import pymysql
3
  import pandas as pd
 
 
 
 
4
 
5
  def get_total_number_of_products():
6
- connection = connect_to_db()
7
- cursor = connection.cursor()
8
 
9
- # Execute SQL query to count total number of products
10
- sql = "SELECT COUNT(*) AS total_products FROM api_database"
11
- cursor.execute(sql)
12
- result = cursor.fetchone()
13
- total_products = result['total_products']
14
 
15
- connection.close()
 
 
 
 
16
 
17
- return total_products
18
 
19
- def search_products(search_query):
20
- search_query = " " + search_query.lower() + " "
21
- connection = connect_to_db()
22
- cursor = connection.cursor()
23
- sql = """
24
- SELECT * FROM api_database
25
- WHERE product_name LIKE %s OR description LIKE %s
26
- """
27
- cursor.execute(sql, ('%' + search_query + '%', '%' + search_query + '%'))
28
- search_results = cursor.fetchall()
29
-
30
- connection.close()
31
- search_results_formatted = []
32
- for result in search_results:
33
- search_results_formatted.append(list(result.values()))
34
- return search_results_formatted
35
-
36
- def sample_fun(first_image,voice_input, text_input):
37
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.pink)) as demo:
40
 
41
- with gr.Tab("Add Your Image"):
42
- voice_input = gr.Audio(label="Upload Audio")
43
  prodcut_id = gr.Textbox(label="Enter Product ID")
44
  with gr.Row():
45
  submit_button_tab_1 = gr.Button("Start")
@@ -56,7 +85,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, seconda
56
  'quantity', 'promotion_on_the_pack', 'type_of_packaging', 'mrp'])
57
 
58
 
59
- submit_button_tab_1.click(fn=sample_fun,inputs=[voice_input,prodcut_id])
60
  submit_button_tab_4.click(fn=search_products,inputs=[embbed_text_search] ,outputs= dataframe_output_tab_4)
61
 
62
- demo.launch(server_name="0.0.0.0",server_port=9003)
 
1
  import gradio as gr
2
  import pymysql
3
  import pandas as pd
4
+ import soundfile as sf
5
+ from audio_text import whisper_openai
6
+ from app_utils import voice_edit, extract_json_from_text, getname
7
+ import uuid
8
 
9
  def get_total_number_of_products():
10
+ pass
 
11
 
12
+ def search_products(search_query):
13
+ pass
14
+ # def get_total_number_of_products():
15
+ # connection = connect_to_db()
16
+ # cursor = connection.cursor()
17
 
18
+ # # Execute SQL query to count total number of products
19
+ # sql = "SELECT COUNT(*) AS total_products FROM api_database"
20
+ # cursor.execute(sql)
21
+ # result = cursor.fetchone()
22
+ # total_products = result['total_products']
23
 
24
+ # connection.close()
25
 
26
+ # return total_products
27
+
28
+ # def search_products(search_query):
29
+ # search_query = " " + search_query.lower() + " "
30
+ # connection = connect_to_db()
31
+ # cursor = connection.cursor()
32
+ # sql = """
33
+ # SELECT * FROM api_database
34
+ # WHERE product_name LIKE %s OR description LIKE %s
35
+ # """
36
+ # cursor.execute(sql, ('%' + search_query + '%', '%' + search_query + '%'))
37
+ # search_results = cursor.fetchall()
38
+
39
+ # connection.close()
40
+ # search_results_formatted = []
41
+ # for result in search_results:
42
+ # search_results_formatted.append(list(result.values()))
43
+ # return search_results_formatted
44
+
45
+
46
+ def sample_fun(voice_input, product_id):
47
+
48
+ audio_path = str(uuid.uuid4().hex) + ".wav"
49
+ print(voice_input)
50
+ sample_rate,audio_data = voice_input
51
+ # audio_data = audio_data.reshape(-1, 1)
52
+ sf.write(audio_path, audio_data, sample_rate)
53
+ # print("Product ID:", product_id)
54
+ transcription = whisper_openai(audio_path)
55
+ # print("Transcription:", transcription)
56
+ prompt = voice_edit.format(text = transcription)
57
+ # print("Prompt:", prompt)
58
+ name = getname(prompt)
59
+ print("Name:", name)
60
+ try:
61
+ json_data = extract_json_from_text(name)
62
+ except Exception as e:
63
+ print(f"-->Exception occurred while extracting JSON: {str(e)}")
64
+ json_data['product_id'] = product_id
65
+
66
+ return json_data
67
 
68
  with gr.Blocks(theme=gr.themes.Default(primary_hue=gr.themes.colors.red, secondary_hue=gr.themes.colors.pink)) as demo:
69
 
70
+ with gr.Tab("Edit by Audio"):
71
+ voice_input = gr.Audio(sources=["microphone"])
72
  prodcut_id = gr.Textbox(label="Enter Product ID")
73
  with gr.Row():
74
  submit_button_tab_1 = gr.Button("Start")
 
85
  'quantity', 'promotion_on_the_pack', 'type_of_packaging', 'mrp'])
86
 
87
 
88
+ submit_button_tab_1.click(fn=sample_fun,inputs=[voice_input,prodcut_id], outputs=prodcut_id)
89
  submit_button_tab_4.click(fn=search_products,inputs=[embbed_text_search] ,outputs= dataframe_output_tab_4)
90
 
91
+ demo.launch(server_name="0.0.0.0",server_port=8007)
src/app/api/module/llm_vision.py CHANGED
@@ -70,7 +70,7 @@ class OpenAIVision:
70
 
71
  return completion.choices[0].message
72
 
73
-
74
 
75
 
76
 
 
70
 
71
  return completion.choices[0].message
72
 
73
+
74
 
75
 
76
 
src/app/api/module/prompts/base.py CHANGED
@@ -41,4 +41,32 @@ gpt3 = dedent(""" I am providing you with a OCR text about a product.
41
  I want you to provide me with the name of prodcut in following JSON format:
42
  "product_name" : "BRU instant coffee".
43
 
44
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  I want you to provide me with the name of prodcut in following JSON format:
42
  "product_name" : "BRU instant coffee".
43
 
44
+ """)
45
+
46
+
47
+ voice_edit = dedent("""
48
+ ### Instruction:
49
+ audio transcription starts here
50
+ {text}
51
+ audio transcription ends here
52
+
53
+ I want you to provide the json format with all the details filled as mentioned below by getting information from the audio transcription.
54
+ ( return "null" where you don't have a answer)
55
+
56
+ "brand": "sample_brand",
57
+ "mrp": "12", ##price of product
58
+ "unit": "per pack",
59
+ "Quantity": 1, ##num of products visible
60
+ "parent_category": "from the above given list",
61
+ "ingredients": ["ingredient1", "ingredient2", "ingredient3"],
62
+ "calorie_count": "Would be in numbers",
63
+ "marketed_by": "sample_marketer",
64
+ "manufactured_by": "sample_manufacturer",
65
+ "manufactured_in_country": "Country XYZ",
66
+ "type_of_packaging": "Box",
67
+ "promotion_on_the_pack": "if any",
68
+ "type_of_product": "give this your understanding",
69
+ "pack_of_or_no_of_units": "No. of Units"
70
+
71
+ Only return the output in the required json format with string in enclosed in double quotes.
72
+ """)
src/app_utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from textwrap import dedent
2
+ import base64
3
+ import requests
4
+ from openai import OpenAI
5
+ import os
6
+ from decouple import config
7
+ import json
8
+
9
+ OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
10
+
11
+
12
+ voice_edit = dedent("""
13
+ ### Instruction:
14
+ audio transcription starts here
15
+ {text}
16
+ audio transcription ends here
17
+
18
+ I want you to provide the json format with all the details filled as mentioned below by getting information from the audio transcription.
19
+ ( return "null" where you don't have a answer)
20
+
21
+ "brand": sample_brand
22
+ "mrp": 12, ##price of product
23
+ "unit": per pack
24
+ "Quantity": 1, ##num of products visible
25
+ "parent_category": from the above given list"
26
+ "ingredients": ["ingredient1", "ingredient2", "ingredient3"] ##list of ingredients
27
+ "calorie_count": 12 ##calorie count
28
+ "marketed_by": ##sample_marketer
29
+ "manufactured_by": ##manufacturer
30
+ "manufactured_in_country": India ##Country of manufacture
31
+ "type_of_packaging": Box
32
+ "promotion_on_the_pack":
33
+ "type_of_product": ## give this your understanding of product
34
+ "pack_of_or_no_of_units": ##No. of Units
35
+ "description": ##description of product
36
+ "weight": ##weight of product
37
+
38
+
39
+ Only return the output in the required json format with string in enclosed in double quotes.
40
+ """)
41
+
42
+ def getname(prompt):
43
+ client = OpenAI(api_key=OPENAI_API_KEY)
44
+ completion = client.chat.completions.create(
45
+ response_format={ "type": "json_object" },
46
+ model="gpt-3.5-turbo-1106",
47
+ messages=[
48
+ {"role": "user", "content": prompt , }
49
+ ]
50
+
51
+ )
52
+
53
+ return completion.choices[0].message.content
54
+
55
+
56
+ def extract_json_from_text(text):
57
+ text = str(text)
58
+ print(f"Extracting JSON from text: {text}")
59
+ try:
60
+ # Find the JSON part within the text
61
+ start_index = text.find('{')
62
+ end_index = text.rfind('}') + 1
63
+ json_part = text[start_index:end_index]
64
+ json_part = json.loads(json_part.strip())
65
+ return json_part
66
+
67
+ except Exception as e:
68
+ print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m")
69
+ return e
src/{app/api/module/audio_text.py → audio_text.py} RENAMED
@@ -1,11 +1,12 @@
1
  # from whisper_jax import FlaxWhisperPipline
2
  # import jax.numpy as jnp
3
- import whisper
4
- print(whisper.__file__)
5
  from openai import OpenAI
6
- from module.config import OPENAI_API_KEY
7
  import os
8
 
 
9
  client = OpenAI()
10
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
11
 
 
1
  # from whisper_jax import FlaxWhisperPipline
2
  # import jax.numpy as jnp
3
+ # import whisper
4
+ # print(whisper.__file__)
5
  from openai import OpenAI
6
+ from decouple import config
7
  import os
8
 
9
+ OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
10
  client = OpenAI()
11
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
12
 
src/requirements.txt CHANGED
@@ -5,4 +5,7 @@ azure-ai-formrecognizer
5
  easyocr
6
  chromadb
7
  langchain_openai
8
- unstructured
 
 
 
 
5
  easyocr
6
  chromadb
7
  langchain_openai
8
+ unstructured
9
+ gradio
10
+ pymysql
11
+ whisper==1.1.10