from models import Generator from scipy.io.wavfile import write from meldataset import MAX_WAV_VALUE import numpy as np import os import json from env import AttrDict import torch import time for dev in ("cpu", "cuda"): print(f"loading model in {dev}") device=torch.device(dev) y1 = torch.load("/speech/arun/tts/hifigan/denorm/test_243.npy.pt", map_location=device) y2 = torch.concat([y1]*5, dim=1) y3 = torch.concat([y1]*10, dim=1) config_file = os.path.join('/speech/arun/tts/hifigan/cp_hifigan/config.json') with open(config_file) as f: data = f.read() json_config = json.loads(data) h = AttrDict(json_config) torch.manual_seed(h.seed) generator = Generator(h).to(device) state_dict_g = torch.load("/speech/arun/tts/hifigan/cp_hifigan/g_00120000", device) generator.load_state_dict(state_dict_g['generator']) generator.eval() generator.remove_weight_norm() for i in range(3): print("Run ",i) for x in [y1, y2, y3]: with torch.no_grad(): st = time.time() y_g_hat = generator(x) audio = y_g_hat.squeeze() audio = audio * MAX_WAV_VALUE audio = audio.cpu().numpy().astype('int16') output_file = "gen.wav" write(output_file, h.sampling_rate, audio) et = time.time() elapsed = (et-st) print("Elapsed time:", elapsed)