faraday commited on
Commit
9f49ca0
0 Parent(s):

initial commit

Browse files

This is without efficient caching etc

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +126 -0
  4. requirements.txt +16 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: V Express
3
+ emoji: 🐠
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.31.5
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import git
3
+ import os
4
+ import shutil
5
+ import subprocess
6
+ import torchaudio
7
+ import torch
8
+
9
+ # Clone the V-Express repository if not already cloned
10
+ repo_url = "https://github.com/tencent-ailab/V-Express"
11
+ repo_dir = "V-Express"
12
+
13
+ hf_model_repo_url = "https://huggingface.co/tk93/V-Express"
14
+ hf_model_repo_dir = "V-Express-models"
15
+
16
+ output_dir = "output"
17
+ temp_audio_path = "temp.mp3"
18
+
19
+ if not os.path.exists(repo_dir):
20
+ git.Repo.clone_from(repo_url, repo_dir)
21
+
22
+ # Install Git LFS and clone the HuggingFace model repository
23
+ def setup_models():
24
+ subprocess.run(["git", "lfs", "install"], check=True)
25
+
26
+ if not os.path.exists(hf_model_repo_dir):
27
+ git.Repo.clone_from(hf_model_repo_url, hf_model_repo_dir)
28
+
29
+ # Move the model_ckpts directory to the correct location
30
+ src = os.path.join(hf_model_repo_dir, "model_ckpts")
31
+ dst = os.path.join(repo_dir, "model_ckpts")
32
+ if os.path.exists(src):
33
+ if os.path.exists(dst):
34
+ shutil.rmtree(dst)
35
+ shutil.move(src, dst)
36
+
37
+
38
+ setup_models()
39
+
40
+ result_path = os.path.join(repo_dir, output_dir)
41
+ if not os.path.exists(result_path):
42
+ os.mkdir(result_path)
43
+
44
+ os.chdir(repo_dir)
45
+
46
+
47
+ # Function to run V-Express demo
48
+ def run_demo(
49
+ reference_image, audio, video,
50
+ kps_path, output_path, retarget_strategy,
51
+ reference_attention_weight=0.95,
52
+ audio_attention_weight=3.0,
53
+ progress=gr.Progress()):
54
+ # Step 1: Extract Keypoints from Video
55
+ progress((0,100), desc="Starting...")
56
+
57
+ kps_sequence_save_path = f"./{output_dir}/kps.pth"
58
+
59
+ if video is not None:
60
+ # Run the script to extract keypoints and audio from the video
61
+ progress((25,100), desc="Extract keypoints and audio...")
62
+ audio_path = video.replace(".mp4", ".mp3")
63
+
64
+ subprocess.run([
65
+ "python",
66
+ "scripts/extract_kps_sequence_and_audio.py",
67
+ "--video_path", video,
68
+ "--kps_sequence_save_path", kps_sequence_save_path,
69
+ "--audio_save_path", audio_path
70
+ ], check=True)
71
+ progress((50,100), desc="Keypoints and audio extracted successfully.")
72
+ #return "Keypoints and audio extracted successfully."
73
+ rem_progress = (75,100)
74
+ else:
75
+ rem_progress = (50,100)
76
+ audio_path = audio
77
+ shutil.copy(kps_path, kps_sequence_save_path)
78
+
79
+ subprocess.run(["ffmpeg", "-i", audio_path, "-c:v", "libx264", "-crf", "18", "-preset", "slow", temp_audio_path])
80
+ shutil.move(temp_audio_path, audio_path)
81
+
82
+ # Step 2: Run Inference with Reference Image and Audio
83
+ # Determine the inference script and parameters based on the selected retargeting strategy
84
+ progress(rem_progress, desc="Inference...")
85
+ inference_script = "inference.py"
86
+ inference_params = [
87
+ "--reference_image_path", reference_image,
88
+ "--audio_path", audio_path,
89
+ "--kps_path", kps_sequence_save_path,
90
+ "--output_path", output_path,
91
+ "--retarget_strategy", retarget_strategy,
92
+ "--num_inference_steps", "30", # Hardcoded for now, can be adjusted
93
+ "--reference_attention_weight", str(reference_attention_weight),
94
+ "--audio_attention_weight", str(audio_attention_weight)
95
+ ]
96
+
97
+ # Run the inference script with the provided parameters
98
+ subprocess.run(["python", inference_script] + inference_params, check=True)
99
+ status = f"Video generated successfully. Saved at: {output_path}"
100
+ progress((100,100), desc=status)
101
+ return output_path, kps_path
102
+
103
+ # Create Gradio interface
104
+ inputs = [
105
+ gr.Image(label="Reference Image", type="filepath"),
106
+ gr.Audio(label="Audio", type="filepath"),
107
+ gr.Video(label="Video"),
108
+ gr.File(label="KPS sequences", value=f"test_samples/short_case/10/kps.pth"),
109
+ gr.Textbox(label="Output Path for generated video", value=f"./{output_dir}/output_video.mp4"),
110
+ gr.Dropdown(label="Retargeting Strategy", choices=["no_retarget", "fix_face", "offset_retarget", "naive_retarget"], value="no_retarget"),
111
+ gr.Slider(label="Reference Attention Weight", minimum=0.0, maximum=1.0, step=0.01, value=0.95),
112
+ gr.Slider(label="Audio Attention Weight", minimum=1.0, maximum=3.0, step=0.1, value=3.0)
113
+ ]
114
+
115
+ output = [
116
+ gr.Video(label="Generated Video"),
117
+ gr.File(label="Generated KPS Sequences File (kps.pth)")
118
+ ]
119
+
120
+ # Title and description for the interface
121
+ title = "V-Express Gradio Interface"
122
+ description = "An interactive interface for generating talking face videos using V-Express."
123
+
124
+ # Launch Gradio app
125
+ demo = gr.Interface(run_demo, inputs, output, title=title, description=description)
126
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ av==10.0.0
2
+ diffusers==0.24.0
3
+ imageio-ffmpeg==0.4.9
4
+ insightface==0.7.3
5
+ omegaconf==2.2.3
6
+ onnxruntime==1.16.3
7
+ safetensors==0.4.2
8
+ torch==2.0.1
9
+ torchaudio==2.0.2
10
+ torchvision==0.15.2
11
+ transformers==4.30.2
12
+ einops==0.4.1
13
+ tqdm==4.66.1
14
+ xformers==0.0.20
15
+ accelerate==0.19.0
16
+ gitpython==3.1.31