import gradio as gr import numpy as np io1 = gr.Interface.load("huggingface/facebook/xm_transformer_s2ut_en-hk") io2 = gr.Interface.load("huggingface/facebook/xm_transformer_s2ut_hk-en") io3 = gr.Interface.load("huggingface/facebook/xm_transformer_unity_en-hk") io4 = gr.Interface.load("huggingface/facebook/xm_transformer_unity_hk-en") def inference(audio, model): try: if not audio: raise ValueError("No audio input provided") if model == "xm_transformer_s2ut_en-hk": out_audio = io1(audio) elif model == "xm_transformer_s2ut_hk-en": out_audio = io2(audio) elif model == "xm_transformer_unity_en-hk": out_audio = io3(audio) elif model == "xm_transformer_unity_hk-en": out_audio = io4(audio) else: raise ValueError(f"Unsupported model: {model}") if not out_audio: raise ValueError("Model failed to generate output") return out_audio, "Success" except Exception as e: print(f"Error during inference: {str(e)}") return None, str(e) block = gr.Blocks() with block: gr.HTML( """

Hokkien Translation

A demo for fairseq speech-to-speech translation models. It supports S2UT and UnitY models for bidirectional Hokkien and English translation. Please select the model and record the input to submit.

""" ) with gr.Group(): with gr.Box(): with gr.Row().style(mobile_collapse=False, equal_height=True): audio = gr.Audio( source="microphone", type="filepath", label="Input" ) btn = gr.Button("Submit") model = gr.Dropdown(choices=["xm_transformer_s2ut_en-hk", "xm_transformer_s2ut_hk-en"], value="xm_transformer_s2ut_en-hk", type="value", label="Model") out = gr.Audio(label="Output") status = gr.Textbox(label="Status", interactive=False) btn.click(inference, inputs=[audio, model], outputs=[out, status], api_name="inference") gr.HTML(''' ''') block.launch()