Audio-Deep-fake / app.py
kushal1506's picture
Upload 7 files
f526c1c verified
raw
history blame
1.81 kB
import gradio as gr
import librosa
import numpy as np
import torch
from torch import Tensor
import torch.nn as nn
from model import Model
model_path = 'final_model.pth'
def load_data(path):
X, fs = librosa.load(path)
X_pad = pad(X,64600)
x_inp = Tensor(X_pad).unsqueeze(0)
return x_inp,fs
def pad(x, max_len=64600):
x_len = x.shape[0]
if x_len >= max_len:
return x[:max_len]
# need to pad
num_repeats = int(max_len / x_len)+1
padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
return padded_x
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model(None, device)
nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
model =nn.DataParallel(model).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
print("Model loaded : {}".format(model_path))
model.eval()
prediction_dict = {0: 'Fake', 1: 'Real'}
def Detection(audio_1):
x_inp,fs = load_data(audio_1)
print(x_inp.shape)
validity_probs = model(x_inp)
validity_probs = torch.nn.functional.softmax(validity_probs, dim=1)
emotion = torch.argmax(validity_probs).item()
print(emotion)
validity = prediction_dict[emotion]
# validity as a dictionary of class probabilities
# validity = {prediction_dict[i]: float(validity_probs[0][i]) for i in range(2)}
return validity
audio_1 = gr.Audio(type="filepath", label="Audio 1")
# text_output = gr.Textbox(label="Prediction")
text_output = gr.Textbox(label="Similarity Score")
gr.Interface(
fn=Detection,
inputs=audio_1,
outputs=text_output,
title="Audio Deepfake Detection",
description="Audio Deepfake Detection using finetuned model on for-2seconds dataset.",
).launch()