Tilakraj0308 commited on
Commit
1d996bc
·
1 Parent(s): 20056be

Upload 3 files

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +100 -0
  3. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ myenv
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from dotenv import load_dotenv
3
+ import gradio as gr
4
+ import openai
5
+ import os
6
+ import soundfile
7
+ from espnet2.bin.tts_inference import Text2Speech
8
+
9
+
10
+
11
+
12
+ def tts(text):
13
+
14
+ text2speech = Text2Speech.from_pretrained("kan-bayashi/ljspeech_vits")
15
+ speech = text2speech(text)["wav"]
16
+ speech = speech.numpy()
17
+
18
+ # Save the waverform
19
+ soundfile.write('example_TTS.wav', speech, 22050, 'PCM_24')
20
+ return os.path.join(os.path.dirname(__file__), "example_TTS.wav")
21
+
22
+
23
+ def transcribe(audio):
24
+ # print(audio)
25
+ # global conv
26
+ file = open(audio, "rb")
27
+ transcription = openai.Audio.transcribe("whisper-1", file=file)
28
+ req = transcription['text']
29
+ # conv.append(req)
30
+ return req
31
+
32
+
33
+ def conversation(audio):
34
+ # global conv
35
+ req = transcribe(audio)
36
+ completion = openai.ChatCompletion.create(
37
+ model="gpt-3.5-turbo",
38
+ messages=[
39
+ {"role": "user", "content": req}]
40
+ # {"role": "user", "content" : line} for line in conv]
41
+ )
42
+ req2 = completion['choices'][0]['message']['content']
43
+ fin_text = 'You: ' + req + '\n' + 'AI: ' + req2.strip()
44
+ tts_data = tts(req2)
45
+ return fin_text, tts_data
46
+
47
+ def generate_image(audio):
48
+ text = transcribe(audio)
49
+ response = openai.Image.create(
50
+ prompt=text,
51
+ n=1,
52
+ size="1024x1024"
53
+ )
54
+ return response['data'][0]['url']
55
+
56
+
57
+ with gr.Blocks() as demo:
58
+ # global conv
59
+ load_dotenv()
60
+ openai.api_key=os.getenv('api_key')
61
+ conv = []
62
+ with gr.Tab("Start a conversation"):
63
+ with gr.Row():
64
+ audio_input_conv=gr.Audio(source="microphone", type="filepath")
65
+ text_output_conv=gr.Textbox(lines=10)
66
+ audio_output_conv = gr.Audio()
67
+ with gr.Row():
68
+ clear_button_conv = gr.Button("Clear")
69
+ submit_button_conv = gr.Button("Submit")
70
+
71
+ with gr.Tab("Generate image"):
72
+ with gr.Row():
73
+ audio_input_img=gr.Audio(source="microphone", type="filepath")
74
+ image_output_img = gr.Image()
75
+ with gr.Row():
76
+ clear_button_img = gr.Button("Clear")
77
+ submit_button_img = gr.Button("Submit")
78
+ # image_button = gr.Button("Flip")
79
+
80
+ # with gr.Accordion("Open for More!"):
81
+ # gr.Markdown("Look at me...")
82
+
83
+
84
+ with gr.Accordion("How to use"):
85
+ gr.Markdown("Record and submit your voice to talk to AI or to generate an image!!")
86
+
87
+ submit_button_conv.click(conversation, inputs=audio_input_conv, outputs=[text_output_conv, audio_output_conv])
88
+ clear_button_conv.click(lambda: None, None, audio_input_conv, queue=False)
89
+ submit_button_img.click(generate_image, inputs=audio_input_img, outputs=image_output_img)
90
+ clear_button_img.click(lambda: None, None, audio_input_img, queue=False)
91
+ clear_button_img.click(lambda: None, None, image_output_img, queue=False)
92
+
93
+
94
+ # def same_auth(username, password):
95
+ # return username == password
96
+
97
+
98
+ # demo.launch(auth=same_auth)
99
+
100
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==3.20.1
2
+ openai==0.27.0
3
+ python-dotenv==1.0.0
4
+ soundfile==0.12.1