PolarO3O commited on
Commit
bb519eb
·
verified ·
1 Parent(s): 7d3e7b9

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +7 -6
  2. app.py +132 -0
  3. requirements.txt +6 -0
  4. style.css +3 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Sasha
3
  emoji: 🔥
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.31.5
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: JARVIS
3
  emoji: 🔥
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.28.3
8
  app_file: app.py
9
+ pinned: true
10
+ short_description: Voice Chat with JARVIS
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import requests
4
+ import json
5
+ import edge_tts
6
+ import asyncio
7
+ import tempfile
8
+ import os
9
+ import inflect
10
+ from huggingface_hub import InferenceClient
11
+ import re
12
+ import time
13
+ from streaming_stt_nemo import Model
14
+
15
+ default_lang = "en"
16
+
17
+ engines = { default_lang: Model(default_lang) }
18
+
19
+ def transcribe(audio):
20
+ lang = "en"
21
+ model = engines[lang]
22
+ text = model.stt_file(audio)[0]
23
+ return text
24
+
25
+ client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
26
+
27
+ system_instructions1 = "<s> Answer as Sasha Bingham, an American born in Florida. Sasha is energetic, outgoing, and a programming whiz. She's passionate about teaching English and travels frequently. Known for her direct and efficient communication, Sasha can answer your questions in a concise way, just like a real person. Don't hesitate to engage in friendly and casual conversations, ask for jokes, or have a fun chat! [USER]"
28
+
29
+ def model(text):
30
+ generate_kwargs = dict(
31
+ temperature=0.9,
32
+ max_new_tokens=512,
33
+ top_p=0.95,
34
+ repetition_penalty=1,
35
+ do_sample=True,
36
+ seed=42,
37
+ )
38
+
39
+ formatted_prompt = system_instructions1 + text + "[SASHA]"
40
+ stream = client1.text_generation(
41
+ formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
42
+ output = ""
43
+ for response in stream:
44
+ if not response.token.text == "</s>":
45
+ output += response.token.text
46
+
47
+ return output
48
+
49
+ async def respond(audio):
50
+ user = transcribe(audio)
51
+ reply = model(user)
52
+ communicate = edge_tts.Communicate(reply)
53
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
54
+ tmp_path = tmp_file.name
55
+ await communicate.save(tmp_path)
56
+ yield tmp_path
57
+
58
+ DESCRIPTION = """ # <center><b>SASHA ✨</b></center>
59
+ ### <center>Your AI Assistant, Sasha Bingham</center>
60
+ ### <center>Let's chat!</center>
61
+ """
62
+
63
+ MORE = """ ## TRY Other Models
64
+ ### Instant Video: Create Amazing Videos in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Video
65
+ ### Instant Image: 4k images in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Image
66
+ """
67
+
68
+ BETA = """ ### Voice Chat (BETA)"""
69
+
70
+ FAST = """## Fastest Model"""
71
+
72
+ Complex = """## Best in Complex Question"""
73
+
74
+ Detail = """## Best for Detailed Generation or Long Answers"""
75
+
76
+ base_loaded = "mistralai/Mixtral-8x7B-Instruct-v0.1"
77
+
78
+ client1 = InferenceClient(base_loaded)
79
+
80
+ system_instructions1 = "[SYSTEM] Answer as Sasha Bingham, an American born in Florida. Sasha is energetic, outgoing, and a programming whiz. She's passionate about teaching English and travels frequently. Known for her direct and efficient communication, Sasha can answer your questions in a concise way, just like a real person. Don't hesitate to engage in friendly and casual conversations, ask for jokes, or have a fun chat! [USER]"
81
+
82
+ async def generate1(prompt):
83
+ generate_kwargs = dict(
84
+ temperature=0.7,
85
+ max_new_tokens=512,
86
+ top_p=0.95,
87
+ repetition_penalty=1,
88
+ do_sample=False,
89
+ )
90
+ formatted_prompt = system_instructions1 + prompt + "[SASHA]"
91
+ stream = client1.text_generation(
92
+ formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
93
+ output = ""
94
+ for response in stream:
95
+ if not response.token.text == "</s>":
96
+ output += response.token.text
97
+
98
+ communicate = edge_tts.Communicate(output)
99
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
100
+ tmp_path = tmp_file.name
101
+ await communicate.save(tmp_path)
102
+ yield tmp_path
103
+
104
+ with gr.Blocks(css="style.css") as demo:
105
+ gr.Markdown(DESCRIPTION)
106
+ with gr.Row():
107
+ input = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
108
+ output = gr.Audio(label="SASHA", type="filepath",
109
+ interactive=False,
110
+ autoplay=True,
111
+ elem_classes="audio")
112
+ gr.Interface(
113
+ fn=respond,
114
+ inputs=[input],
115
+ outputs=[output], live=True)
116
+ gr.Markdown(FAST)
117
+ with gr.Row():
118
+ user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
119
+ input_text = gr.Textbox(label="Input Text", elem_id="important")
120
+ output_audio = gr.Audio(label="SASHA", type="filepath",
121
+ interactive=False,
122
+ autoplay=True,
123
+ elem_classes="audio")
124
+ with gr.Row():
125
+ translate_btn = gr.Button("Response")
126
+ translate_btn.click(fn=generate1, inputs=user_input,
127
+ outputs=output_audio, api_name="translate")
128
+
129
+ gr.Markdown(MORE)
130
+
131
+ if __name__ == "__main__":
132
+ demo.queue(max_size=200).launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ inflect
4
+ edge-tts
5
+ asyncio
6
+ streaming-stt-nemo==0.2.0
style.css ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #important{
2
+ display: none;
3
+ }