|
import gradio as gr |
|
import librosa |
|
from transformers import AutoFeatureExtractor, pipeline |
|
|
|
|
|
def load_and_fix_data(input_file, model_sampling_rate): |
|
speech, sample_rate = librosa.load(input_file) |
|
if len(speech.shape) > 1: |
|
speech = speech[:, 0] + speech[:, 1] |
|
if sample_rate != model_sampling_rate: |
|
speech = librosa.resample(speech, sample_rate, model_sampling_rate) |
|
return speech |
|
|
|
|
|
model_name1 = "jonatasgrosman/wav2vec2-xls-r-1b-spanish" |
|
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name1) |
|
sampling_rate = feature_extractor.sampling_rate |
|
asr = pipeline("automatic-speech-recognition", model=model_name1) |
|
|
|
|
|
model_name2 = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021" |
|
classifier = pipeline("text-classification", model = model_name2) |
|
|
|
|
|
def speech_to_text(input_file): |
|
speech = load_and_fix_data(input_file, sampling_rate) |
|
transcribed_text = asr(speech, chunk_length_s=15, stride_length_s=1)["text"] |
|
return transcribed_text |
|
|
|
|
|
def sexism_detector(transcribed_text): |
|
sexism_detection = classifier(transcribed_text)[0]["label"] |
|
return sexism_detection |
|
|
|
|
|
def asr_and_sexism_detection(input_file): |
|
transcribed_text = speech_to_text(input_file) |
|
sexism_detection = sexism_detector(transcribed_text) |
|
if sexism_detection == "LABEL_0": |
|
return "The input audio contains NON-SEXIST language" |
|
else: |
|
return "SEXIST LANGUAGE DETECTED" |
|
|
|
|
|
inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")] |
|
outputs=[gr.outputs.Textbox(label="Predicción")] |
|
examples=[["audio1.wav"], ["audio2.wav"], ["audio3.wav"], ["audio4.wav"], ["sample_audio.wav"]] |
|
title="Spanish Audio Transcription based Sexism Detection" |
|
|
|
description = """ This is a Gradio demo for Spanish audio transcription-based Sexism detection. The key objective is to detect whether the sexist language is present in the audio or not. To use this app, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and classified as sexism/non-sexism pertaining to audio (transcription) with the help of pre-trained models. |
|
|
|
|
|
|
|
**Note regarding the predicted label: LABEL_0: "NON SEXISM" or LABEL_1: "SEXISM"** |
|
|
|
|
|
|
|
Pre-trained Model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish) |
|
|
|
Pre-trained Model used for Sexism Detection : [hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021](https://huggingface.co/hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021) |
|
|
|
""" |
|
|
|
|
|
gr.Interface( |
|
asr_and_sexism_detection, |
|
inputs=inputs, |
|
outputs=outputs, |
|
examples=examples, |
|
title=title, |
|
description=description, |
|
layout="horizontal", |
|
theme="huggingface", |
|
).launch(enable_queue=True) |
|
|