real_or_fake / app.py
Mrkomiljon's picture
Update app.py
69452e3 verified
import gradio as gr
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
import torch
import torchaudio
# Modelni yuklash
model_name = "Mrkomiljon/voiceGUARD"
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
processor = Wav2Vec2Processor.from_pretrained(model_name)
model.eval()
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Klass mapping
id2label = {
0: "diffwave",
1: "melgan",
2: "parallel_wave_gan",
3: "Real",
4: "wavegrad",
5: "wavnet",
6: "wavernn"
}
# Prediction funksiyasi
def predict_audio(file_path):
target_sample_rate = 16000
max_length = target_sample_rate * 10
try:
# upload audio file
waveform, sample_rate = torchaudio.load(file_path)
# Resample agar sample_rate mos kelmasa
if sample_rate != target_sample_rate:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
waveform = resampler(waveform)
# Truncate yoki pad
if waveform.size(1) > max_length:
waveform = waveform[:, :max_length]
elif waveform.size(1) < max_length:
waveform = torch.nn.functional.pad(waveform, (0, max_length - waveform.size(1)))
if waveform.ndim > 1:
waveform = waveform[0]
# Preprocess input
inputs = processor(
waveform.numpy(),
sampling_rate=target_sample_rate,
return_tensors="pt",
padding=True
)
input_values = inputs["input_values"].to(device)
# Inference
with torch.no_grad():
logits = model(input_values).logits
probabilities = torch.nn.functional.softmax(logits, dim=-1)
predicted_label = torch.argmax(probabilities, dim=-1).item()
confidence = probabilities[0, predicted_label].item()
class_name = id2label.get(predicted_label, "Unknown Class")
# Return alohida qiymatlar
return class_name, float(confidence)
except Exception as e:
# Xatolik bo'lsa
return "Error", str(e)
# Gradio interfeysi
iface = gr.Interface(
fn=predict_audio,
inputs=gr.Audio(type="filepath"), # `filepath` parameter use
outputs=[
gr.Label(label="Predicted Class"),
gr.Label(label="Confidence")
],
title="Human or AI-generated voice classification",
description="Upload an audio file to classify it into one of the predefined categories."
)
if __name__ == "__main__":
iface.launch()