Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gradio as gr | |
import os | |
import torch | |
from model import Wav2Vec2BERT_Llama # 自定义模型模块 | |
import dataset # 自定义数据集模块 | |
from huggingface_hub import hf_hub_download | |
def dummy(): # just a dummy | |
pass | |
# 修改 load_model 函数 | |
def load_model(): | |
checkpoint_path = hf_hub_download( | |
repo_id="amphion/deepfake_detection", | |
filename="checkpoints_wav2vec2bert_ft_llama_labels_ASVspoof2019_RandomPrompts_6/model_checkpoint.pth", | |
repo_type="model" | |
) | |
if not os.path.exists(checkpoint_path): | |
raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}") | |
return checkpoint_path | |
checkpoint_path = load_model() | |
# 将 detect 函数移到 GPU 装饰器下 | |
def detect_on_gpu(dataset): | |
"""在 GPU 上进行音频伪造检测""" | |
print("\n=== 开始音频检测 ===") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print(f"使用设备: {device}") | |
print("正在初始化模型...") | |
model = Wav2Vec2BERT_Llama().to(device) | |
print(f"正在加载模型权重: {checkpoint_path}") | |
checkpoint = torch.load(checkpoint_path, map_location=device) | |
model_state_dict = checkpoint['model_state_dict'] | |
threshold = 0.9996 | |
print(f"检测阈值设置为: {threshold}") | |
# 处理模型状态字典的 key | |
if hasattr(model, 'module') and not any(key.startswith('module.') for key in model_state_dict.keys()): | |
print("添加 'module.' 前缀到状态字典的 key") | |
model_state_dict = {'module.' + key: value for key, value in model_state_dict.items()} | |
elif not hasattr(model, 'module') and any(key.startswith('module.') for key in model_state_dict.keys()): | |
print("移除状态字典 key 中的 'module.' 前缀") | |
model_state_dict = {key.replace('module.', ''): value for key, value in model_state_dict.items()} | |
model.load_state_dict(model_state_dict) | |
model.eval() | |
print("模型加载完成,进入评估模式") | |
print("\n开始处理音频数据...") | |
with torch.no_grad(): | |
for batch_idx, batch in enumerate(dataset): | |
print(f"\n处理批次 {batch_idx + 1}") | |
print("准备主特征...") | |
main_features = { | |
'input_features': batch['main_features']['input_features'].to(device), | |
'attention_mask': batch['main_features']['attention_mask'].to(device) | |
} | |
print(f"主特征形状: {main_features['input_features'].shape}") | |
if len(batch['prompt_features']) > 0: | |
print("\n准备提示特征...") | |
prompt_features = [{ | |
'input_features': pf['input_features'].to(device), | |
'attention_mask': pf['attention_mask'].to(device) | |
} for pf in batch['prompt_features']] | |
print(f"提示特征数量: {len(prompt_features)}") | |
print(f"第一个提示特征形状: {prompt_features[0]['input_features'].shape}") | |
print("\n准备提示标签...") | |
prompt_labels = batch['prompt_labels'].to(device) | |
print(f"提示标签形状: {prompt_labels.shape}") | |
print(f"提示标签值: {prompt_labels}") | |
else: | |
prompt_features = [] | |
prompt_labels = [] | |
print("\n执行模型推理...") | |
outputs = model({ | |
'main_features': main_features, | |
'prompt_features': prompt_features, | |
'prompt_labels': prompt_labels | |
}) | |
print("\n处理模型输出...") | |
avg_scores = outputs['avg_logits'].softmax(dim=-1) | |
deepfake_scores = avg_scores[:, 1].cpu() | |
is_fake = deepfake_scores[0].item() > threshold | |
result = {"is_fake": is_fake, "confidence": deepfake_scores[0] if is_fake else 1-deepfake_scores[0]} | |
break | |
print("\n=== 检测完成 ===") | |
return result | |
# 修改音频伪造检测主函数 | |
def audio_deepfake_detection(demonstrations, query_audio_path): | |
demonstration_paths = [audio[0] for audio in demonstrations if audio[0] is not None] | |
demonstration_labels = [audio[1] for audio in demonstrations if audio[1] is not None] | |
if len(demonstration_paths) != len(demonstration_labels): | |
demonstration_labels = demonstration_labels[:len(demonstration_paths)] | |
# 数据集处理 | |
audio_dataset = dataset.DemoDataset(demonstration_paths, demonstration_labels, query_audio_path) | |
# 调用 GPU 检测函数 | |
result = detect_on_gpu(audio_dataset) | |
return { | |
"Is AI Generated": result["is_fake"], | |
"Confidence": f"{100*result['confidence']:.2f}%" | |
} | |
# Gradio 界面 | |
def gradio_ui(): | |
def detection_wrapper(demonstration_audio1, label1, demonstration_audio2, label2, demonstration_audio3, label3, query_audio): | |
demonstrations = [ | |
(demonstration_audio1, label1), | |
(demonstration_audio2, label2), | |
(demonstration_audio3, label3), | |
] | |
return audio_deepfake_detection(demonstrations,query_audio) | |
interface = gr.Interface( | |
fn=detection_wrapper, | |
inputs=[ | |
gr.Audio(sources=["upload"], type="filepath", label="Demonstration Audio 1"), | |
gr.Dropdown(choices=["bonafide", "spoof"], value="bonafide", label="Label 1"), | |
gr.Audio(sources=["upload"], type="filepath", label="Demonstration Audio 2"), | |
gr.Dropdown(choices=["bonafide", "spoof"], value="bonafide", label="Label 2"), | |
gr.Audio(sources=["upload"], type="filepath", label="Demonstration Audio 3"), | |
gr.Dropdown(choices=["bonafide", "spoof"], value="bonafide", label="Label 3"), | |
gr.Audio(sources=["upload"], type="filepath", label="Query Audio (Audio for Detection)") | |
], | |
outputs=gr.JSON(label="Detection Results"), | |
title="Audio Deepfake Detection System", | |
description="Upload demonstration audios and a query audio to detect whether the query is AI-generated.", | |
) | |
return interface | |
if __name__ == "__main__": | |
demo = gradio_ui() | |
demo.launch() | |