suric's picture
add transcribe button
4336e0a
raw
history blame
6.48 kB
import os
import gradio as gr
from gradio_components.prediction import predict, transcribe
theme = gr.themes.Glass(
primary_hue="fuchsia",
secondary_hue="indigo",
neutral_hue="slate",
font=[gr.themes.GoogleFont('Source Sans Pro'), 'ui-sans-serif', 'system-ui',
'sans-serif'],
).set(
body_background_fill_dark='*background_fill_primary',
embed_radius='*table_radius',
background_fill_primary='*neutral_50',
background_fill_primary_dark='*neutral_950',
background_fill_secondary_dark='*neutral_900',
border_color_accent='*neutral_600',
border_color_accent_subdued='*color_accent',
border_color_primary_dark='*neutral_700',
block_background_fill='*background_fill_primary',
block_background_fill_dark='*neutral_800',
block_border_width='1px',
block_label_background_fill='*background_fill_primary',
block_label_background_fill_dark='*background_fill_secondary',
block_label_text_color='*neutral_500',
block_label_text_size='*text_sm',
block_label_text_weight='400',
block_shadow='none',
block_shadow_dark='none',
block_title_text_color='*neutral_500',
block_title_text_weight='400',
panel_border_width='0',
panel_border_width_dark='0',
checkbox_background_color_dark='*neutral_800',
checkbox_border_width='*input_border_width',
checkbox_label_border_width='*input_border_width',
input_background_fill='*neutral_100',
input_background_fill_dark='*neutral_700',
input_border_color_focus_dark='*neutral_700',
input_border_width='0px',
input_border_width_dark='0px',
slider_color='#2563eb',
slider_color_dark='#2563eb',
table_even_background_fill_dark='*neutral_950',
table_odd_background_fill_dark='*neutral_900',
button_border_width='*input_border_width',
button_shadow_active='none',
button_primary_background_fill='*primary_200',
button_primary_background_fill_dark='*primary_700',
button_primary_background_fill_hover='*button_primary_background_fill',
button_primary_background_fill_hover_dark='*button_primary_background_fill',
button_secondary_background_fill='*neutral_200',
button_secondary_background_fill_dark='*neutral_600',
button_secondary_background_fill_hover='*button_secondary_background_fill',
button_secondary_background_fill_hover_dark='*button_secondary_background_fill',
button_cancel_background_fill='*button_secondary_background_fill',
button_cancel_background_fill_dark='*button_secondary_background_fill',
button_cancel_background_fill_hover='*button_cancel_background_fill',
button_cancel_background_fill_hover_dark='*button_cancel_background_fill'
)
_AUDIOCRAFT_MODELS = ["facebook/musicgen-melody",
"facebook/musicgen-medium",
"facebook/musicgen-small",
"facebook/musicgen-large",
"facebook/musicgen-melody-large"]
def generate_prompt(difficulty, style):
_DIFFICULTY_MAPPIN = {
"Easy": "beginner player",
"Medum": "player who has 2-3 years experience",
"Hard": "player who has more than 4 years experiences"
}
prompt = 'piano only music for a {} to pratice with the touch of {}'.format(
_DIFFICULTY_MAPPIN[difficulty], style
)
return prompt
def UI():
with gr.Blocks() as demo:
with gr.Tab("Generate Music by melody"):
with gr.Row():
with gr.Column():
with gr.Row():
model_path = gr.Dropdown(
choices=_AUDIOCRAFT_MODELS,
label="Select the model",
value="facebook/musicgen-melody-large"
)
with gr.Row():
duration = gr.Slider(
minimum=10,
maximum=60,
value=10,
label="Duration",
interactive=True
)
with gr.Row():
topk = gr.Number(label="Top-k", value=250, interactive=True)
topp = gr.Number(label="Top-p", value=0, interactive=True)
temperature = gr.Number(
label="Temperature", value=1.0, interactive=True
)
sample_rate = gr.Number(
label="output music sample rate", value=32000,
interactive=True
)
difficulty = gr.Radio(
["Easy", "Medium", "Hard"], label="Difficulty",
value="Easy", interactive=True
)
style = gr.Radio(
["Jazz", "Classical Music", "Hip Hop", "Others"],
value="Classical Music", label="music genre",
interactive=True
)
if style == "Others":
style = gr.Textbox(label="Type your music genre")
prompt = generate_prompt(difficulty.value, style.value)
customize = gr.Checkbox(
label="Customize the prompt", interactive=True
)
if customize:
prompt = gr.Textbox(label="Type your prompt")
with gr.Column():
with gr.Row():
melody = gr.Audio(
sources=["microphone", "upload"],
label="Record or upload your audio",
#interactive=True,
show_label=True,
)
with gr.Row():
submit = gr.Button("Generate Music")
output_audio = gr.Audio("listen to the generated music", type="filepath")
with gr.Row():
transcribe_button = gr.Button("Transcribe")
d = gr.DownloadButton("Download the file", visible=False)
transcribe_button.click(transcribe, inputs=[output_audio], outputs=d)
submit.click(
fn=predict,
inputs=[model_path, prompt, melody, duration, topk, topp, temperature,
sample_rate],
outputs=output_audio
)
gr.Examples(
examples=[
[
os.path.join(
os.path.dirname(__file__),
"./data/audio/twinkle_twinkle_little_stars_mozart_20sec.mp3"
),
"Easy",
32000,
20
],
[
os.path.join(
os.path.dirname(__file__),
"./data/audio/golden_hour_20sec.mp3"
),
"Easy",
32000,
20
],
[
os.path.join(
os.path.dirname(__file__),
"./data/audio/turkish_march_mozart_20sec.mp3"
),
"Easy",
32000,
20
],
[
os.path.join(
os.path.dirname(__file__),
"./data/audio/golden_hour_20sec.mp3"
),
"Hard",
32000,
20
],
[
os.path.join(
os.path.dirname(__file__),
"./data/audio/golden_hour_20sec.mp3"
),
"Hard",
32000,
40
],
[
os.path.join(
os.path.dirname(__file__),
"./data/audio/golden_hour_20sec.mp3"
),
"Hard",
16000,
20
],
],
inputs=[melody, difficulty, sample_rate, duration],
label="Audio Examples",
outputs=[output_audio],
# cache_examples=True,
)
demo.queue().launch()
if __name__ == "__main__":
UI()