File size: 5,337 Bytes
4af4e17 48b8a2c 4af4e17 b4bce9a 4af4e17 b4bce9a 4af4e17 b4bce9a 4af4e17 6c4f574 c68830c b4bce9a 11da134 4af4e17 668ed0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import pathlib
import uuid
import os
import gradio as gr
from tqdm import tqdm
import requests
import urllib.request
import json
import time
output_mp3="output.mp3"
def upload_image(img: str, d_id_key: str):
url = "https://api.d-id.com/images"
files = {"image": ("hero.jpg", open(img, "rb"), "image/jpg")}
headers = {
"accept": "application/json",
"authorization": "Basic "+d_id_key
}
response = requests.post(url, files=files, headers=headers)
response_dict = response.json()
img_url = response_dict["url"]
# return audio_url
print(img_url)
return img_url
def upload_audio(audio: str, d_id_key: str):
url = "https://api.d-id.com/audios"
files = {"audio": (audio, open(audio, "rb"), "audio/mpeg")}
headers = {
"accept": "application/json",
"authorization": "Basic "+d_id_key
}
response = requests.post(url, files=files, headers=headers)
response_dict = response.json()
audio_url = response_dict["url"]
# return audio_url
print(audio_url)
return audio_url
def get_did_video(process_video_url,d_id_key):
url = "https://api.d-id.com/talks/"+process_video_url
headers = {
"accept": "application/json",
"authorization": "Basic "+d_id_key
}
response_dict = {}
while "result_url" not in response_dict:
# make API call and get response dictionary
response = requests.get(url, headers=headers)
response_dict = response.json()
print(response.text)
# wait for 1 second before checking again
time.sleep(1)
# "result_url" key is now present in the dictionary
result_url = response_dict["result_url"]
print("From did_video \n\n\n")
print("/n/n/n")
# response_dict = response.json()
result_url = response_dict["result_url"]
print(result_url)
return result_url
def text_to_speach_api(text: str, elv_key,voice_id: str):
url = "https://api.elevenlabs.io/v1/text-to-speech/"+voice_id+"/stream"
headers = {
"accept": "*/*",
"xi-api-key": elv_key,
"Content-Type": "application/json",
}
data = {
"text": text,
"voice_settings": {
"stability": 0,
"similarity_boost": 0
}
}
response = requests.post(url, headers=headers, json=data)
# print(response.text)
if response.ok:
with open("output.mp3", "wb") as f:
f.write(response.content)
else:
print("Error: ", response.text)
def get_voice_names():
with open("data.json") as f:
data = json.load(f)
return [voice["name"] for voice in data["voices"]]
# define a function to get voice id by name
def get_voice_id(name):
# load the JSON data
with open("data.json") as f:
data = json.load(f)
for voice in data['voices']:
if voice['name'] == name:
return voice['voice_id']
return None
#D-id API
def d_id_api(image_url, d_id_key,audio_url):
print("D-id API")
url = "https://api.d-id.com/talks"
payload = {
"source_url": image_url,
"script": {
"type": "audio",
"audio_url": audio_url,
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": "Basic "+d_id_key
}
response = requests.post(url, json=payload, headers=headers)
print("From D-id API \n\n\n")
print(response.text)
response_dict = response.json()
process_video = response_dict["id"]
# return audio_url
print(process_video)
return process_video
def transcribe_video(d_id_key: str, elv_key: str, full_text: str,voice_name: str,img):
print(voice_name)
voice_id=get_voice_id(voice_name)
text_to_speach_api(full_text, elv_key,voice_id)
audio_url=upload_audio(output_mp3,d_id_key)
image_url=upload_image(img,d_id_key)
process_video_url=d_id_api(image_url, d_id_key,audio_url)
video_url=get_did_video(process_video_url,d_id_key)
file_name = 'hero.mp4'
urllib.request.urlretrieve(video_url, file_name)
return file_name
examples = [["", "","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
"Arnold","./images/hero.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/3.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/2.jpg"]]
demo = gr.Interface(fn=transcribe_video, inputs=[
gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
gr.Dropdown(choices=get_voice_names(), label="Select a voice"),
gr.Image(label="photo of a Person", type="filepath")
], outputs="video",title="Bring your images to life with the talking animation feature now!",examples=examples,cache_examples=False)
demo.launch() |