Commit
·
d0bf7de
1
Parent(s):
9875c73
Update app.py
Browse files
app.py
CHANGED
@@ -90,6 +90,7 @@ def calculate(image_in, audio_in):
|
|
90 |
torchaudio.save("/content/audio.wav", waveform, sample_rate, encoding="PCM_S", bits_per_sample=16)
|
91 |
image = Image.open(image_in)
|
92 |
image = pad_image(image)
|
|
|
93 |
image.save("image.png")
|
94 |
|
95 |
pocketsphinx_run = subprocess.run(['pocketsphinx', '-phone_align', 'yes', 'single', '/content/audio.wav'], check=True, capture_output=True)
|
@@ -97,6 +98,7 @@ def calculate(image_in, audio_in):
|
|
97 |
with open("test.json", "w") as f:
|
98 |
f.write(jq_run.stdout.decode('utf-8').strip())
|
99 |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
100 |
os.system(f"cd /content/one-shot-talking-face && python3 -B test_script.py --img_path /content/image.png --audio_path /content/audio.wav --phoneme_path /content/test.json --save_dir /content/train")
|
101 |
return "/content/train/image_audio.mp4"
|
102 |
|
@@ -116,7 +118,7 @@ def merge_frames():
|
|
116 |
filenames.sort() # this iteration technique has no built in order, so sort the frames
|
117 |
print(filenames)
|
118 |
images = list(map(lambda filename: imageio.imread("/content/video_results/restored_imgs/"+filename), filenames))
|
119 |
-
|
120 |
imageio.mimsave('/content/video_output.mp4', images, fps=25.0) # modify the frame duration as needed
|
121 |
return "/content/video_output.mp4"
|
122 |
|
@@ -140,6 +142,8 @@ def one_shot_talking(image_in,audio_in):
|
|
140 |
os.system(f"python /content/GFPGAN/inference_gfpgan.py --upscale 2 -i /content/image_pre.png -o /content/results --bg_upsampler realesrgan")
|
141 |
# time.sleep(60)
|
142 |
image_in_one_shot='/content/results/restored_imgs/image_pre.png'
|
|
|
|
|
143 |
#One Shot Talking Face algorithm
|
144 |
calculate(image_in_one_shot,audio_in)
|
145 |
|
@@ -149,6 +153,8 @@ def one_shot_talking(image_in,audio_in):
|
|
149 |
os.system(f"python /content/PyVideoFramesExtractor/extract.py --video=/content/train/image_audio.mp4")
|
150 |
|
151 |
#2. Improve image quality using GFPGAN on each frames
|
|
|
|
|
152 |
os.system(f"python /content/GFPGAN/inference_gfpgan.py --upscale 2 -i /content/extracted_frames/image_audio_frames -o /content/video_results --bg_upsampler realesrgan")
|
153 |
|
154 |
#3. Merge all the frames to a one video using imageio
|
|
|
90 |
torchaudio.save("/content/audio.wav", waveform, sample_rate, encoding="PCM_S", bits_per_sample=16)
|
91 |
image = Image.open(image_in)
|
92 |
image = pad_image(image)
|
93 |
+
os.system(f"rm -rf /content/image.png")
|
94 |
image.save("image.png")
|
95 |
|
96 |
pocketsphinx_run = subprocess.run(['pocketsphinx', '-phone_align', 'yes', 'single', '/content/audio.wav'], check=True, capture_output=True)
|
|
|
98 |
with open("test.json", "w") as f:
|
99 |
f.write(jq_run.stdout.decode('utf-8').strip())
|
100 |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
101 |
+
os.system(f"rm -rf /content/image_audio.mp4")
|
102 |
os.system(f"cd /content/one-shot-talking-face && python3 -B test_script.py --img_path /content/image.png --audio_path /content/audio.wav --phoneme_path /content/test.json --save_dir /content/train")
|
103 |
return "/content/train/image_audio.mp4"
|
104 |
|
|
|
118 |
filenames.sort() # this iteration technique has no built in order, so sort the frames
|
119 |
print(filenames)
|
120 |
images = list(map(lambda filename: imageio.imread("/content/video_results/restored_imgs/"+filename), filenames))
|
121 |
+
os.system(f"rm -rf /content/video_output.mp4")
|
122 |
imageio.mimsave('/content/video_output.mp4', images, fps=25.0) # modify the frame duration as needed
|
123 |
return "/content/video_output.mp4"
|
124 |
|
|
|
142 |
os.system(f"python /content/GFPGAN/inference_gfpgan.py --upscale 2 -i /content/image_pre.png -o /content/results --bg_upsampler realesrgan")
|
143 |
# time.sleep(60)
|
144 |
image_in_one_shot='/content/results/restored_imgs/image_pre.png'
|
145 |
+
os.system(f"rm -rf /content/results/restored_imgs/image_pre.png")
|
146 |
+
|
147 |
#One Shot Talking Face algorithm
|
148 |
calculate(image_in_one_shot,audio_in)
|
149 |
|
|
|
153 |
os.system(f"python /content/PyVideoFramesExtractor/extract.py --video=/content/train/image_audio.mp4")
|
154 |
|
155 |
#2. Improve image quality using GFPGAN on each frames
|
156 |
+
os.system(f"rm -rf /content/extracted_frames/image_audio_frames")
|
157 |
+
os.system(f"rm -rf /content/video_results/")
|
158 |
os.system(f"python /content/GFPGAN/inference_gfpgan.py --upscale 2 -i /content/extracted_frames/image_audio_frames -o /content/video_results --bg_upsampler realesrgan")
|
159 |
|
160 |
#3. Merge all the frames to a one video using imageio
|