import evaluate from evaluate.utils import launch_gradio_widget import gradio as gr from transformers import AutoModelForSequenceClassification, pipeline, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer import tempfile tmp = tempfile.NamedTemporaryFile() # Define the list of available models available_models = { "mskov/roberta-base-toxicity": "Roberta Finetuned Model" } # Create a Gradio interface with audio file and text inputs def classify_toxicity(audio_file, text_input, selected_model): # Transcribe the audio file using Whisper ASR if audio_file != None: whisper_module = evaluate.load("whisper") transcription_results = whisper_module.compute(uploaded=audio_file) # Extract the transcribed text transcribed_text = transcription_results["transcription"] else: transcribed_text = text_input # Load the selected toxicity classification model toxicity_module = evaluate.load("toxicity", selected_model) #toxicity_module = evaluate.load("toxicity", 'DaNLP/da-electra-hatespeech-detection', module_type="measurement") toxicity_results = toxicity_module.compute(predictions=[transcribed_text]) toxicity_score = toxicity_results["toxicity"][0] print(toxicity_score) return toxicity_score, transcribed_text # return f"Toxicity Score ({available_models[selected_model]}): {toxicity_score:.4f}" iface = gr.Interface( fn=classify_toxicity, inputs=[ gr.Audio(source="upload", type="filepath", label="Upload Audio File"), gr.Textbox(type="text", label="Enter Text", placeholder="Enter text here..."), gr.Radio(available_models, type="value", label="Select Model") ], outputs="text", live=True, title="Toxicity Classifier with ASR", description="Upload an audio file or enter text to classify its toxicity using the selected model.", ) iface.launch()