Spaces:

Mrkomiljon
/

real_or_fake

Running

App Files Files Community

real_or_fake / app.py

Mrkomiljon

Update app.py

69452e3 verified 6 months ago

raw

history blame contribute delete

2.61 kB

	import gradio as gr
	from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
	import torch
	import torchaudio

	# Modelni yuklash
	model_name = "Mrkomiljon/voiceGUARD"
	model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
	processor = Wav2Vec2Processor.from_pretrained(model_name)
	model.eval()

	# Device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Klass mapping
	id2label = {
	0: "diffwave",
	1: "melgan",
	2: "parallel_wave_gan",
	3: "Real",
	4: "wavegrad",
	5: "wavnet",
	6: "wavernn"
	}

	# Prediction funksiyasi
	def predict_audio(file_path):
	target_sample_rate = 16000
	max_length = target_sample_rate * 10

	try:
	# upload audio file
	waveform, sample_rate = torchaudio.load(file_path)

	# Resample agar sample_rate mos kelmasa
	if sample_rate != target_sample_rate:
	resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
	waveform = resampler(waveform)

	# Truncate yoki pad
	if waveform.size(1) > max_length:
	waveform = waveform[:, :max_length]
	elif waveform.size(1) < max_length:
	waveform = torch.nn.functional.pad(waveform, (0, max_length - waveform.size(1)))

	if waveform.ndim > 1:
	waveform = waveform[0]

	# Preprocess input
	inputs = processor(
	waveform.numpy(),
	sampling_rate=target_sample_rate,
	return_tensors="pt",
	padding=True
	)
	input_values = inputs["input_values"].to(device)

	# Inference
	with torch.no_grad():
	logits = model(input_values).logits
	probabilities = torch.nn.functional.softmax(logits, dim=-1)
	predicted_label = torch.argmax(probabilities, dim=-1).item()
	confidence = probabilities[0, predicted_label].item()

	class_name = id2label.get(predicted_label, "Unknown Class")

	# Return alohida qiymatlar
	return class_name, float(confidence)

	except Exception as e:
	# Xatolik bo'lsa
	return "Error", str(e)

	# Gradio interfeysi
	iface = gr.Interface(
	fn=predict_audio,
	inputs=gr.Audio(type="filepath"), # `filepath` parameter use
	outputs=[
	gr.Label(label="Predicted Class"),
	gr.Label(label="Confidence")
	],
	title="Human or AI-generated voice classification",
	description="Upload an audio file to classify it into one of the predefined categories."
	)

	if __name__ == "__main__":
	iface.launch()