Spaces:
Running
Running
import gradio as gr | |
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor | |
import torch | |
import torchaudio | |
# Modelni yuklash | |
model_name = "Mrkomiljon/voiceGUARD" | |
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) | |
processor = Wav2Vec2Processor.from_pretrained(model_name) | |
model.eval() | |
# Device | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Klass mapping | |
id2label = { | |
0: "diffwave", | |
1: "melgan", | |
2: "parallel_wave_gan", | |
3: "Real", | |
4: "wavegrad", | |
5: "wavnet", | |
6: "wavernn" | |
} | |
# Prediction funksiyasi | |
def predict_audio(file_path): | |
target_sample_rate = 16000 | |
max_length = target_sample_rate * 10 | |
try: | |
# upload audio file | |
waveform, sample_rate = torchaudio.load(file_path) | |
# Resample agar sample_rate mos kelmasa | |
if sample_rate != target_sample_rate: | |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate) | |
waveform = resampler(waveform) | |
# Truncate yoki pad | |
if waveform.size(1) > max_length: | |
waveform = waveform[:, :max_length] | |
elif waveform.size(1) < max_length: | |
waveform = torch.nn.functional.pad(waveform, (0, max_length - waveform.size(1))) | |
if waveform.ndim > 1: | |
waveform = waveform[0] | |
# Preprocess input | |
inputs = processor( | |
waveform.numpy(), | |
sampling_rate=target_sample_rate, | |
return_tensors="pt", | |
padding=True | |
) | |
input_values = inputs["input_values"].to(device) | |
# Inference | |
with torch.no_grad(): | |
logits = model(input_values).logits | |
probabilities = torch.nn.functional.softmax(logits, dim=-1) | |
predicted_label = torch.argmax(probabilities, dim=-1).item() | |
confidence = probabilities[0, predicted_label].item() | |
class_name = id2label.get(predicted_label, "Unknown Class") | |
# Return alohida qiymatlar | |
return class_name, float(confidence) | |
except Exception as e: | |
# Xatolik bo'lsa | |
return "Error", str(e) | |
# Gradio interfeysi | |
iface = gr.Interface( | |
fn=predict_audio, | |
inputs=gr.Audio(type="filepath"), # `filepath` parameter use | |
outputs=[ | |
gr.Label(label="Predicted Class"), | |
gr.Label(label="Confidence") | |
], | |
title="Human or AI-generated voice classification", | |
description="Upload an audio file to classify it into one of the predefined categories." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |