import gradio as gr from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor import torch import torchaudio # Modelni yuklash model_name = "Mrkomiljon/voiceGUARD" model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) processor = Wav2Vec2Processor.from_pretrained(model_name) model.eval() # Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Klass mapping id2label = { 0: "diffwave", 1: "melgan", 2: "parallel_wave_gan", 3: "Real", 4: "wavegrad", 5: "wavnet", 6: "wavernn" } # Prediction funksiyasi def predict_audio(file_path): target_sample_rate = 16000 max_length = target_sample_rate * 10 try: # upload audio file waveform, sample_rate = torchaudio.load(file_path) # Resample agar sample_rate mos kelmasa if sample_rate != target_sample_rate: resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate) waveform = resampler(waveform) # Truncate yoki pad if waveform.size(1) > max_length: waveform = waveform[:, :max_length] elif waveform.size(1) < max_length: waveform = torch.nn.functional.pad(waveform, (0, max_length - waveform.size(1))) if waveform.ndim > 1: waveform = waveform[0] # Preprocess input inputs = processor( waveform.numpy(), sampling_rate=target_sample_rate, return_tensors="pt", padding=True ) input_values = inputs["input_values"].to(device) # Inference with torch.no_grad(): logits = model(input_values).logits probabilities = torch.nn.functional.softmax(logits, dim=-1) predicted_label = torch.argmax(probabilities, dim=-1).item() confidence = probabilities[0, predicted_label].item() class_name = id2label.get(predicted_label, "Unknown Class") # Return alohida qiymatlar return class_name, float(confidence) except Exception as e: # Xatolik bo'lsa return "Error", str(e) # Gradio interfeysi iface = gr.Interface( fn=predict_audio, inputs=gr.Audio(type="filepath"), # `filepath` parameter use outputs=[ gr.Label(label="Predicted Class"), gr.Label(label="Confidence") ], title="Human or AI-generated voice classification", description="Upload an audio file to classify it into one of the predefined categories." ) if __name__ == "__main__": iface.launch()