|
import gradio as gr |
|
import librosa |
|
import numpy as np |
|
import torch |
|
from torch import Tensor |
|
import torch.nn as nn |
|
from model import Model |
|
|
|
model_path = 'DF_FOR_finetune_best.pt' |
|
def load_data(path): |
|
X, fs = librosa.load(path) |
|
X_pad = pad(X,64600) |
|
x_inp = Tensor(X_pad).unsqueeze(0) |
|
return x_inp,fs |
|
|
|
def pad(x, max_len=64600): |
|
x_len = x.shape[0] |
|
if x_len >= max_len: |
|
return x[:max_len] |
|
|
|
num_repeats = int(max_len / x_len)+1 |
|
padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0] |
|
return padded_x |
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
model = Model(None, device) |
|
nb_params = sum([param.view(-1).size()[0] for param in model.parameters()]) |
|
model =nn.DataParallel(model).to(device) |
|
|
|
model.load_state_dict(torch.load(model_path, map_location=device)['model']) |
|
print("Model loaded : {}".format(model_path)) |
|
|
|
model.eval() |
|
prediction_dict = {0: 'Fake', 1: 'Real'} |
|
def Detection(audio_1): |
|
|
|
x_inp,fs = load_data(audio_1) |
|
print(x_inp.shape) |
|
validity_probs = model(x_inp) |
|
validity_probs = torch.nn.functional.softmax(validity_probs, dim=1) |
|
|
|
validity = {prediction_dict[i]: float(validity_probs[0][i]) for i in range(2)} |
|
|
|
return validity |
|
|
|
|
|
|
|
audio_1 = gr.Audio(sources="upload", type="filepath", label="Audio 1") |
|
|
|
gr.Interface( |
|
fn=Detection, |
|
inputs=audio_1, |
|
outputs='label', |
|
title="Audio Deepfake Detection", |
|
description="Audio Deepfake Detection using finetuned model on for-2seconds dataset.", |
|
).launch() |
|
|