import evaluate from evaluate.utils import launch_gradio_widget import gradio as gr from transformers import AutoModelForSequenceClassification, pipeline, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer import tempfile tmp = tempfile.NamedTemporaryFile() # Define the list of available models available_models = { "mskov/roberta-base-toxicity": "Roberta Finetuned Model" } # Create a Gradio interface with audio file and text inputs def classify_toxicity(audio_file, text_file, selected_model): # Transcribe the audio file using Whisper ASR if audio_file != None: whisper_module = evaluate.load("whisper") transcription_results = whisper_module.compute(uploaded=audio_file) # Extract the transcribed text transcribed_text = transcription_results["transcription"] else: with open(tmp.text_file) as f: for line in f: transcribed_text.append(line) # Load the selected toxicity classification model toxicity_module = evaluate.load("toxicity", selected_model) toxicity_results = toxicity_module.compute(predictions=[transcribed_text]) toxicity_score = toxicity_results["toxicity"][0] return transcribed_text, f"Toxicity Score ({available_models[selected_model]}): {toxicity_score:.4f}" iface = gr.Interface( fn=classify_toxicity, inputs=[ gr.Audio(source="upload", type="filepath", label="Upload Audio File"), gr.File(label="Upload Text File"), gr.Radio(available_models, type="value", label="Select Model") ], outputs="text", live=True, title="Toxicity Classifier with ASR", description="Upload an audio file or enter text to classify its toxicity using the selected model.", ) iface.launch()