Spaces:

H-Liu1997
/

TANGO

Running on L40S

File size: 19,128 Bytes

31f2f28

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "**Fixes by: [justinjohn-03](https://github.com/justinjohn0306)**"
      ],
      "metadata": {
        "id": "9Uyk6DCBGHuW"
      }
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U1xFNFU58_2j"
      },
      "source": [
        "## Goal: Make anyone speak anything (LipSync)\n",
        "\n",
        "* Github: https://github.com/Rudrabha/Wav2Lip\n",
        "* Paper: https://arxiv.org/abs/2008.10010\n",
        "*Original notebook: https://colab.research.google.com/drive/1tZpDWXz49W6wDcTprANRGLo2D_EbD5J8?usp=sharing\n",
        "\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Qgo-oaI3JU2u",
        "cellView": "form"
      },
      "source": [
        "#@title <h1>Step1: Setup Wav2Lip</h1>\n",
        "#@markdown * Install dependency\n",
        "#@markdown * Download pretrained model\n",
        "!rm -rf /content/sample_data\n",
        "!mkdir /content/sample_data\n",
        "\n",
        "!git clone https://github.com/zabique/Wav2Lip\n",
        "\n",
        "#download the pretrained model\n",
        "!wget 'https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA' -O '/content/Wav2Lip/checkpoints/wav2lip_gan.pth'\n",
        "a = !pip install https://raw.githubusercontent.com/AwaleSajil/ghc/master/ghc-1.0-py3-none-any.whl\n",
        "\n",
        "# !pip uninstall tensorflow tensorflow-gpu\n",
        "!cd Wav2Lip && pip install -r requirements.txt\n",
        "\n",
        "#download pretrained model for face detection\n",
        "!wget \"https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth\" -O \"/content/Wav2Lip/face_detection/detection/sfd/s3fd.pth\"\n",
        "\n",
        "!pip install -q youtube-dl\n",
        "!pip install ffmpeg-python\n",
        "!pip install librosa==0.9.1\n",
        "\n",
        "#this code for recording audio\n",
        "\"\"\"\n",
        "To write this piece of code I took inspiration/code from a lot of places.\n",
        "It was late night, so I'm not sure how much I created or just copied o.O\n",
        "Here are some of the possible references:\n",
        "https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/\n",
        "https://stackoverflow.com/a/18650249\n",
        "https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/\n",
        "https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/\n",
        "https://stackoverflow.com/a/49019356\n",
        "\"\"\"\n",
        "from IPython.display import HTML, Audio\n",
        "from google.colab.output import eval_js\n",
        "from base64 import b64decode\n",
        "import numpy as np\n",
        "from scipy.io.wavfile import read as wav_read\n",
        "import io\n",
        "import ffmpeg\n",
        "\n",
        "AUDIO_HTML = \"\"\"\n",
        "<script>\n",
        "var my_div = document.createElement(\"DIV\");\n",
        "var my_p = document.createElement(\"P\");\n",
        "var my_btn = document.createElement(\"BUTTON\");\n",
        "var t = document.createTextNode(\"Press to start recording\");\n",
        "\n",
        "my_btn.appendChild(t);\n",
        "//my_p.appendChild(my_btn);\n",
        "my_div.appendChild(my_btn);\n",
        "document.body.appendChild(my_div);\n",
        "\n",
        "var base64data = 0;\n",
        "var reader;\n",
        "var recorder, gumStream;\n",
        "var recordButton = my_btn;\n",
        "\n",
        "var handleSuccess = function(stream) {\n",
        "  gumStream = stream;\n",
        "  var options = {\n",
        "    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n",
        "    mimeType : 'audio/webm;codecs=opus'\n",
        "    //mimeType : 'audio/webm;codecs=pcm'\n",
        "  };            \n",
        "  //recorder = new MediaRecorder(stream, options);\n",
        "  recorder = new MediaRecorder(stream);\n",
        "  recorder.ondataavailable = function(e) {            \n",
        "    var url = URL.createObjectURL(e.data);\n",
        "    var preview = document.createElement('audio');\n",
        "    preview.controls = true;\n",
        "    preview.src = url;\n",
        "    document.body.appendChild(preview);\n",
        "\n",
        "    reader = new FileReader();\n",
        "    reader.readAsDataURL(e.data); \n",
        "    reader.onloadend = function() {\n",
        "      base64data = reader.result;\n",
        "      //console.log(\"Inside FileReader:\" + base64data);\n",
        "    }\n",
        "  };\n",
        "  recorder.start();\n",
        "  };\n",
        "\n",
        "recordButton.innerText = \"Recording... press to stop\";\n",
        "\n",
        "navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);\n",
        "\n",
        "\n",
        "function toggleRecording() {\n",
        "  if (recorder && recorder.state == \"recording\") {\n",
        "      recorder.stop();\n",
        "      gumStream.getAudioTracks()[0].stop();\n",
        "      recordButton.innerText = \"Saving the recording... pls wait!\"\n",
        "  }\n",
        "}\n",
        "\n",
        "// https://stackoverflow.com/a/951057\n",
        "function sleep(ms) {\n",
        "  return new Promise(resolve => setTimeout(resolve, ms));\n",
        "}\n",
        "\n",
        "var data = new Promise(resolve=>{\n",
        "//recordButton.addEventListener(\"click\", toggleRecording);\n",
        "recordButton.onclick = ()=>{\n",
        "toggleRecording()\n",
        "\n",
        "sleep(2000).then(() => {\n",
        "  // wait 2000ms for the data to be available...\n",
        "  // ideally this should use something like await...\n",
        "  //console.log(\"Inside data:\" + base64data)\n",
        "  resolve(base64data.toString())\n",
        "\n",
        "});\n",
        "\n",
        "}\n",
        "});\n",
        "      \n",
        "</script>\n",
        "\"\"\"\n",
        "\n",
        "%cd /\n",
        "from ghc.l_ghc_cf import l_ghc_cf\n",
        "%cd content\n",
        "\n",
        "def get_audio():\n",
        "  display(HTML(AUDIO_HTML))\n",
        "  data = eval_js(\"data\")\n",
        "  binary = b64decode(data.split(',')[1])\n",
        "  \n",
        "  process = (ffmpeg\n",
        "    .input('pipe:0')\n",
        "    .output('pipe:1', format='wav')\n",
        "    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)\n",
        "  )\n",
        "  output, err = process.communicate(input=binary)\n",
        "  \n",
        "  riff_chunk_size = len(output) - 8\n",
        "  # Break up the chunk size into four bytes, held in b.\n",
        "  q = riff_chunk_size\n",
        "  b = []\n",
        "  for i in range(4):\n",
        "      q, r = divmod(q, 256)\n",
        "      b.append(r)\n",
        "\n",
        "  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.\n",
        "  riff = output[:4] + bytes(b) + output[8:]\n",
        "\n",
        "  sr, audio = wav_read(io.BytesIO(riff))\n",
        "\n",
        "  return audio, sr\n",
        "\n",
        "\n",
        "from IPython.display import HTML\n",
        "from base64 import b64encode\n",
        "def showVideo(path):\n",
        "  mp4 = open(str(path),'rb').read()\n",
        "  data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
        "  return HTML(\"\"\"\n",
        "  <video width=700 controls>\n",
        "        <source src=\"%s\" type=\"video/mp4\">\n",
        "  </video>\n",
        "  \"\"\" % data_url)\n",
        "\n",
        "from IPython.display import clear_output"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SEdy6PWDXMRL"
      },
      "source": [
        "# LipSync Youtube Video"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QI4kcm8QEeGZ",
        "cellView": "form"
      },
      "source": [
        "#@title STEP2: Select a Youtube Video\n",
        "# Install yt-dlp\n",
        "!pip install yt-dlp\n",
        "\n",
        "#@markdown ### Find YouTube video ID from URL\n",
        "from urllib import parse as urlparse\n",
        "YOUTUBE_URL = 'https://www.youtube.com/watch?v=vAnWYLTdvfY' #@param {type:\"string\"}\n",
        "url_data = urlparse.urlparse(YOUTUBE_URL)\n",
        "query = urlparse.parse_qs(url_data.query)\n",
        "YOUTUBE_ID = query[\"v\"][0]\n",
        "\n",
        "#@markdown ### Trim the video (start, end) seconds\n",
        "start = 35 #@param {type:\"integer\"}\n",
        "end = 62 #@param {type:\"integer\"}\n",
        "interval = end - start\n",
        "\n",
        "# Download the YouTube video using yt-dlp\n",
        "!yt-dlp -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n",
        "\n",
        "# Cut the video using FFmpeg\n",
        "!ffmpeg -y -i youtube.mp4 -ss {start} -t {interval} -async 1 /content/sample_data/input_vid.mp4\n",
        "\n",
        "# Preview the trimmed video\n",
        "from IPython.display import HTML\n",
        "from base64 import b64encode\n",
        "mp4 = open('/content/sample_data/input_vid.mp4','rb').read()\n",
        "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
        "HTML(f\"\"\"<video width=600 controls><source src=\"{data_url}\"></video>\"\"\")\n",
        "\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zS_RAeh-IfZy",
        "cellView": "form"
      },
      "source": [
        "#@title STEP3: Select Audio (Record or Upload)\n",
        "from IPython.display import Audio \n",
        "from IPython.core.display import display\n",
        "\n",
        "record_or_upload = 'Upload' #@param ['Record', 'Upload']\n",
        "\n",
        "def displayAudio():\n",
        "  display(Audio('/content/sample_data/input_audio.wav'))\n",
        "if record_or_upload == 'Record':\n",
        "  audio, sr = get_audio()\n",
        "  import scipy\n",
        "  scipy.io.wavfile.write('/content/sample_data/input_audio.wav', sr, audio)\n",
        "elif record_or_upload == 'Upload':\n",
        "  from google.colab import files\n",
        "  uploaded = files.upload()\n",
        "  for fn in uploaded.keys():\n",
        "    print('User uploaded file \"{name}\" with length {length} bytes'.format(\n",
        "        name=fn, length=len(uploaded[fn])))\n",
        "  \n",
        "  #concider only the first file\n",
        "  audio_file = str(list(uploaded.keys())[0])\n",
        "  \n",
        "  # Load audio with specified sampling rate\n",
        "  import librosa\n",
        "  audio, sr = librosa.load(audio_file, sr=None)\n",
        "  \n",
        "  # Save audio with specified sampling rate\n",
        "  import soundfile as sf\n",
        "  sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n",
        "  \n",
        "  clear_output()\n",
        "  displayAudio()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BQPLXJ8L0gms",
        "cellView": "form"
      },
      "source": [
        "#@title STEP4: Start Crunching and Preview Output\n",
        "#@markdown <b>Note: Only change these, if you have to</b>\n",
        "pad_top =  0#@param {type:\"integer\"}\n",
        "pad_bottom =  10#@param {type:\"integer\"}\n",
        "pad_left =  0#@param {type:\"integer\"}\n",
        "pad_right =  0#@param {type:\"integer\"}\n",
        "rescaleFactor =  1#@param {type:\"integer\"}\n",
        "nosmooth = False #@param {type:\"boolean\"}\n",
        "\n",
        "\n",
        "if nosmooth == False:\n",
        "  !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor\n",
        "else:\n",
        "  !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth\n",
        "#Preview output video\n",
        "clear_output()\n",
        "print(\"Final Video Preview\")\n",
        "print(\"Download this video from\", '/content/Wav2Lip/results/result_voice.mp4')\n",
        "showVideo('/content/Wav2Lip/results/result_voice.mp4')\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vYxpPeie1CYL"
      },
      "source": [
        "# LipSync on Your Video File"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nDuM7tfZ1F0t",
        "cellView": "form"
      },
      "source": [
        "import os\n",
        "from google.colab import files\n",
        "from IPython.display import HTML\n",
        "\n",
        "def showVideo(file_path):\n",
        "    \"\"\"Function to display video in Colab\"\"\"\n",
        "    mp4 = open(file_path,'rb').read()\n",
        "    data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
        "    display(HTML(\"\"\"\n",
        "    <video controls width=600>\n",
        "        <source src=\"%s\" type=\"video/mp4\">\n",
        "    </video>\n",
        "    \"\"\" % data_url))\n",
        "\n",
        "#@markdown ### Select an uploading method\n",
        "upload_or_path = \"Upload\" #@param [\"Upload\", \"Custom Path\"]\n",
        "\n",
        "if upload_or_path == \"Upload\":\n",
        "    uploaded = files.upload()\n",
        "    for filename in uploaded.keys():\n",
        "        os.rename(filename, '/content/sample_data/input_vid.mp4')\n",
        "    PATH_TO_YOUR_VIDEO = '/content/sample_data/input_vid.mp4'\n",
        "else:\n",
        "    PATH_TO_YOUR_VIDEO = '/content/test.mp4' #@param {type:\"string\"}\n",
        "    if not os.path.isfile(PATH_TO_YOUR_VIDEO):\n",
        "        print(\"ERROR: File not found!\")\n",
        "        raise SystemExit(0)\n",
        "\n",
        "#@markdown ### Trim the video (start, end) seconds\n",
        "start_time = 0 #@param {type:\"integer\"}\n",
        "end_time = 0 #@param {type:\"integer\"}\n",
        "\n",
        "if start_time == 0 and end_time == 0:\n",
        "    print(\"No trimming applied\")\n",
        "else:\n",
        "    duration = end_time - start_time\n",
        "    os.system(f\"ffmpeg -i {PATH_TO_YOUR_VIDEO} -ss {start_time} -t {duration} -async 1 /content/sample_data/trimmed_vid.mp4\")\n",
        "    PATH_TO_YOUR_VIDEO = \"/content/sample_data/input_vid.mp4\"\n",
        "    print(f\"Video trimmed from {start_time} to {end_time} seconds\")\n",
        "\n",
        "print(f\"PATH_TO_YOUR_VIDEO: {PATH_TO_YOUR_VIDEO}\")\n",
        "\n",
        "if upload_or_path == \"Upload\":\n",
        "    clear_output()\n",
        "    print(\"Input Video\")\n",
        "    showVideo(PATH_TO_YOUR_VIDEO)\n",
        "else:\n",
        "    if os.path.isfile(PATH_TO_YOUR_VIDEO):\n",
        "        print(\"Input Video\")\n",
        "        showVideo(PATH_TO_YOUR_VIDEO)\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XgF4794r7sWK",
        "cellView": "form"
      },
      "source": [
        "#@title STEP3: Select Audio (Record or Upload)\n",
        "from IPython.display import Audio \n",
        "from IPython.core.display import display\n",
        "\n",
        "record_or_upload = 'Upload' #@param ['Record', 'Upload']\n",
        "\n",
        "def displayAudio():\n",
        "  display(Audio('/content/sample_data/input_audio.wav'))\n",
        "if record_or_upload == 'Record':\n",
        "  audio, sr = get_audio()\n",
        "  import scipy\n",
        "  scipy.io.wavfile.write('/content/sample_data/input_audio.wav', sr, audio)\n",
        "elif record_or_upload == 'Upload':\n",
        "  from google.colab import files\n",
        "  uploaded = files.upload()\n",
        "  for fn in uploaded.keys():\n",
        "    print('User uploaded file \"{name}\" with length {length} bytes'.format(\n",
        "        name=fn, length=len(uploaded[fn])))\n",
        "  \n",
        "  #concider only the first file\n",
        "  audio_file = str(list(uploaded.keys())[0])\n",
        "  \n",
        "  # Load audio with specified sampling rate\n",
        "  import librosa\n",
        "  audio, sr = librosa.load(audio_file, sr=None)\n",
        "  \n",
        "  # Save audio with specified sampling rate\n",
        "  import soundfile as sf\n",
        "  sf.write('/content/sample_data/input_audio.wav', audio, sr, format='wav')\n",
        "  \n",
        "  clear_output()\n",
        "  displayAudio()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZgtO08V28ANf",
        "cellView": "form"
      },
      "source": [
        "#@title STEP4: Start Crunching and Preview Output\n",
        "#@markdown <b>Note: Only change these, if you have to</b>\n",
        "pad_top =  0#@param {type:\"integer\"}\n",
        "pad_bottom =  10#@param {type:\"integer\"}\n",
        "pad_left =  0#@param {type:\"integer\"}\n",
        "pad_right =  0#@param {type:\"integer\"}\n",
        "rescaleFactor =  1#@param {type:\"integer\"}\n",
        "nosmooth = False #@param {type:\"boolean\"}\n",
        "\n",
        "if nosmooth == False:\n",
        "  !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor\n",
        "else:\n",
        "  !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face \"../sample_data/input_vid.mp4\" --audio \"../sample_data/input_audio.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth\n",
        "\n",
        "#Preview output video\n",
        "clear_output()\n",
        "print(\"Final Video Preview\")\n",
        "print(\"Dowload this video from\", '/content/Wav2Lip/results/result_voice.mp4')\n",
        "showVideo('/content/Wav2Lip/results/result_voice.mp4')\n"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}