|
from diffusers import AutoencoderOobleck |
|
import torch |
|
from transformers import T5EncoderModel,T5TokenizerFast |
|
from diffusers import FluxTransformer2DModel |
|
from torch import nn |
|
from typing import List |
|
from diffusers import FlowMatchEulerDiscreteScheduler |
|
from diffusers.training_utils import compute_density_for_timestep_sampling |
|
import copy |
|
import torch.nn.functional as F |
|
import numpy as np |
|
from src.model import TangoFlux |
|
from huggingface_hub import snapshot_download |
|
from tqdm import tqdm |
|
from typing import Optional,Union,List |
|
from datasets import load_dataset, Audio |
|
from math import pi |
|
import json |
|
import inspect |
|
import yaml |
|
from safetensors.torch import load_file |
|
|
|
|
|
class TangoFluxInference: |
|
|
|
def __init__(self,name='declare-lab/TangoFlux',device="cuda"): |
|
|
|
|
|
self.vae = AutoencoderOobleck() |
|
|
|
paths = snapshot_download(repo_id=name) |
|
vae_weights = load_file("{}/vae.safetensors".format(paths)) |
|
self.vae.load_state_dict(vae_weights) |
|
weights = load_file("{}/tangoflux.safetensors".format(paths)) |
|
|
|
with open('{}/config.json'.format(paths),'r') as f: |
|
config = json.load(f) |
|
self.model = TangoFlux(config) |
|
self.model.load_state_dict(weights,strict=False) |
|
|
|
self.vae.to(device) |
|
self.model.to(device) |
|
|
|
def generate(self,prompt,steps=25,duration=10,guidance_scale=4.5): |
|
|
|
with torch.no_grad(): |
|
latents = self.model.inference_flow(prompt, |
|
duration=duration, |
|
num_inference_steps=steps, |
|
guidance_scale=guidance_scale) |
|
|
|
|
|
|
|
wave = self.vae.decode(latents.transpose(2,1)).sample.cpu()[0] |
|
waveform_end = int(duration * self.vae.config.sampling_rate) |
|
wave = wave[:, :waveform_end] |
|
return wave |
|
|
|
|
|
|
|
|
|
|