import logging
import os
import re
import warnings
import gradio as gr
import requests
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from templates import starting_app_code, update_iframe_js, copy_snippet_js, download_code_js, load_js
# Filter the UserWarning raised by the audio component.
warnings.filterwarnings("ignore", message='Trying to convert audio automatically from int32 to 16-bit int format')
logging.basicConfig(
level=logging.INFO, # Set the logging level to INFO or any other desired level
format="%(asctime)s - %(message)s", # Define the log message format
datefmt="%Y-%m-%d %H:%M:%S", # Define the timestamp format
)
logger = logging.getLogger("my_logger")
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise Exception("HF_TOKEN environment variable is required to call remote API.")
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
def init_speech_to_text_model():
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "distil-whisper/distil-medium.en"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
return pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
torch_dtype=torch_dtype,
device=device,
)
whisper_pipe = init_speech_to_text_model()
code_pattern = re.compile(r'```python\n(.*?)```', re.DOTALL)
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def generate_text(code, prompt):
logger.info(f"Calling API with prompt:\n{prompt}")
prompt = f"```python\n{code}```\nGiven the code above return only updated code for the following request:\n{prompt}\n<|assistant|>"
params = {"max_new_tokens": 512}
output = query({
"inputs": prompt,
"parameters": params,
})
if 'error' in output:
logger.warning(f'Language model call failed: {output["error"]}')
raise gr.Warning(f'Language model call failed: {output["error"]}')
logger.info(f'API RESPONSE\n{output[0]["generated_text"]}')
assistant_reply = output[0]["generated_text"].split('<|assistant|>')[1]
match = re.search(code_pattern, assistant_reply)
if not match:
return assistant_reply, code, None
new_code = match.group(1)
logger.info(f'NEW CODE:\nnew_code')
return assistant_reply, new_code, None
def transcribe(audio):
result = whisper_pipe(audio)
return result["text"], None
def copy_notify(code):
gr.Info("App code snippet copied!")
with gr.Blocks() as demo:
gr.Markdown("
KiteWind 🪁🍃
")
gr.Markdown(
"Chat-assisted web app creator by @gstaff
")
selectedTab = gr.State(value='stlite')
with gr.Tab('Streamlit (stlite)') as stlite_tab:
with gr.Row():
with gr.Column():
gr.Markdown("## 1. Run your app in the browser!")
html = gr.HTML(value='')
gr.Markdown("## 2. Customize using voice requests!")
with gr.Row():
with gr.Column():
with gr.Group():
in_audio = gr.Audio(label="Record a voice request", source='microphone', type='filepath')
in_prompt = gr.Textbox(label="Or type a text request and press Enter",
placeholder="Need an idea? Try one of these:\n- Add a button\n- Change the greeting from hello to hey there")
out_text = gr.TextArea(label="Chat Assistant Response")
clear_btn = gr.ClearButton([in_prompt, in_audio, out_text])
with gr.Column():
code_area = gr.Code(label="App Code - You can also edit directly and then click Update App",
language='python', value=starting_app_code('stlite'))
update_btn = gr.Button("Update App", variant="primary")
code_update_params = {'fn': None, 'inputs': code_area, 'outputs': None, '_js': update_iframe_js('stlite')}
gen_text_params = {'fn': generate_text, 'inputs': [code_area, in_prompt],
'outputs': [out_text, code_area]}
transcribe_params = {'fn': transcribe, 'inputs': [in_audio], 'outputs': [in_prompt, in_audio]}
update_btn.click(**code_update_params)
in_prompt.submit(**gen_text_params).then(**code_update_params)
in_audio.stop_recording(**transcribe_params).then(**gen_text_params).then(**code_update_params)
with gr.Row():
with gr.Column():
gr.Markdown("## 3. Export your app to share!")
copy_snippet_btn = gr.Button("Copy app snippet to paste in another page")
copy_snippet_btn.click(copy_notify, code_area, None, _js=copy_snippet_js('stlite'))
download_btn = gr.Button("Download app as a standalone file")
download_btn.click(None, code_area, None, _js=download_code_js('stlite'))
with gr.Row():
with gr.Column():
gr.Markdown("## Current limitations")
with gr.Accordion("Click to view", open=False):
gr.Markdown(
"- Only Streamlit apps using libraries available in pyodide are supported\n- The chat hasn't been tuned on Streamlit library data; it may make mistakes")
with gr.Tab('Gradio (gradio-lite)') as gradio_lite_tab:
with gr.Row():
with gr.Column():
gr.Markdown("## 1. Run your app in the browser!")
html = gr.HTML(value='')
gr.Markdown("## 2. Customize using voice requests!")
with gr.Row():
with gr.Column():
with gr.Group():
in_audio = gr.Audio(label="Record a voice request", source='microphone', type='filepath')
in_prompt = gr.Textbox(label="Or type a text request and press Enter",
placeholder="Need an idea? Try one of these:\n- Add a button to reverse the name\n- Change the greeting to Hola\n- Put the reversed name output into a separate textbox\n- Change the theme from monochrome to soft")
out_text = gr.TextArea(label="Chat Assistant Response")
clear = gr.ClearButton([in_prompt, in_audio, out_text])
with gr.Column():
code_area = gr.Code(label="App Code - You can also edit directly and then click Update App",
language='python', value=starting_app_code('gradio-lite'))
update_btn = gr.Button("Update App", variant="primary")
code_update_params = {'fn': None, 'inputs': code_area, 'outputs': None,
'_js': update_iframe_js('gradio-lite')}
gen_text_params = {'fn': generate_text, 'inputs': [code_area, in_prompt], 'outputs': [out_text, code_area]}
transcribe_params = {'fn': transcribe, 'inputs': [in_audio], 'outputs': [in_prompt, in_audio]}
update_btn.click(**code_update_params)
in_prompt.submit(**gen_text_params).then(**code_update_params)
in_audio.stop_recording(**transcribe_params).then(**gen_text_params).then(**code_update_params)
with gr.Row():
with gr.Column():
gr.Markdown("## 3. Export your app to share!")
copy_snippet_btn = gr.Button("Copy app snippet to paste in another page")
copy_snippet_btn.click(copy_notify, code_area, None, _js=copy_snippet_js('gradio-lite'))
download_btn = gr.Button("Download app as a standalone file")
download_btn.click(None, code_area, None, _js=download_code_js('gradio-lite'))
with gr.Row():
with gr.Column():
gr.Markdown("## Current limitations")
with gr.Accordion("Click to view", open=False):
gr.Markdown(
"- Only gradio-lite apps using the python standard libraries and gradio are supported\n- The chat hasn't been tuned on gradio library data; it may make mistakes\n- The app needs to fully reload each time it is changed")
stlite_tab.select(lambda: "stlite", None, selectedTab).then(None, None, None, _js=load_js('stlite'))
gradio_lite_tab.select(lambda: "gradio-lite", None, selectedTab).then(None, None, None, _js=load_js('gradio-lite'))
demo.load(None, None, None, _js=load_js('stlite'))
demo.css = "footer {visibility: hidden}"
if __name__ == "__main__":
demo.queue().launch()