Spaces:

versantus
/

genmoai

Runtime error

App Files Files Community

genmoai / contrib /modal /main.py

versantus

Upload folder using huggingface_hub

d6c2737 verified 8 months ago

raw

history blame contribute delete

9.99 kB

	import modal
	from pathlib import Path

	# Creating our Modal App
	app = modal.App("mochi-finetune")

	# Creating volumes for data, intermediate data, and produced weights
	videos_volume = modal.Volume.from_name("mochi-tune-videos", create_if_missing=True)
	videos_prepared_volume = modal.Volume.from_name("mochi-tune-videos-prepared", create_if_missing=True)
	weights_volume = modal.Volume.from_name("mochi-tune-weights", create_if_missing=True)
	finetunes_volume = modal.Volume.from_name("mochi-tune-finetunes", create_if_missing=True)
	outputs_volume = modal.Volume.from_name("mochi-tune-outputs", create_if_missing=True)

	USERNAME = "genmoai"
	REPOSITORY = "mochi"
	CLONE_CMD = f"git clone https://github.com/{USERNAME}/{REPOSITORY}.git"

	# Building our container image
	base_img = (
	modal.Image.debian_slim()
	.apt_install("git", "ffmpeg", "bc", "zlib1g-dev", "libjpeg-dev", "wget")
	.run_commands(CLONE_CMD)
	.workdir(REPOSITORY)
	.pip_install("gdown", "setuptools", "wheel")
	.run_commands('pip install -e . --no-build-isolation')
	)

	MINUTES = 60
	HOURS = 60 * MINUTES

	# Remote function for downloading a labeled video dataset from Google Drive
	# Run it with:
	# modal run main::download_videos
	@app.function(image=base_img,
	volumes={
	"/videos": videos_volume,
	}
	)
	def download_videos():
	'''Downloads videos from google drive into our volume'''
	import gdown
	import zipfile

	name = "dissolve"
	url = "https://drive.google.com/uc?id=1ldoBppcsv5Ueoikh0zCmNviojRCrGXQN"
	output = f"{name}.zip"
	gdown.download(url, output, quiet=False)
	with zipfile.ZipFile(output, "r") as zip_ref:
	zip_ref.extractall("/videos")

	# Remote function for downloading the model weights from Hugging Face
	# Run it with:
	# modal run main::download_weights
	@app.function(image=base_img,
	volumes={
	"/weights": weights_volume,
	},
	timeout=1*HOURS,
	)
	def download_weights():
	# HF-transfer and snapshot download tend to hang on the large model, so we download it manually with wget
	import subprocess
	print("🍡 Downloading weights from Hugging Face. This may take 30 minutes.")
	# ~30 min
	subprocess.run(["wget", "https://huggingface.co/genmo/mochi-1-preview/resolve/main/dit.safetensors", "-O", "/weights/dit.safetensors"])
	# ~1 min
	subprocess.run(["wget", "https://huggingface.co/genmo/mochi-1-preview/resolve/main/decoder.safetensors", "-O", "/weights/decoder.safetensors"])
	# ~20 sec
	subprocess.run(["wget", "https://huggingface.co/genmo/mochi-1-preview/resolve/main/encoder.safetensors", "-O", "/weights/encoder.safetensors"])

	# Remote function for preprocessing the video dataset
	# Run it with:
	# modal run main::preprocess
	@app.function(
	image=base_img,
	volumes={
	"/videos": videos_volume,
	"/videos_prepared": videos_prepared_volume,
	"/weights": weights_volume,
	},
	timeout=30*MINUTES,
	gpu="H100"
	)
	def preprocess():
	import subprocess
	print("🍡 Preprocessing videos. This may take 2-3 minutes.")
	video_dir = "videos_dissolve"
	subprocess.run([
	"bash", "demos/fine_tuner/preprocess.bash",
	"-v", f"/videos/{video_dir}/",
	"-o", "/videos_prepared/",
	"-w", "/weights/",
	"-n", "37"
	])

	# Remote function for finetuning the model using the prepared dataset
	# Configure the run in lora.yaml
	# Run it with:
	# modal run main::finetune
	@app.function(
	image=base_img,
	volumes={
	"/videos": videos_volume,
	"/videos_prepared": videos_prepared_volume,
	"/weights": weights_volume,
	"/finetunes": finetunes_volume,
	},
	mounts=[modal.Mount.from_local_file("lora.yaml", remote_path=f"{REPOSITORY}/lora.yaml")],
	timeout=4*HOURS,
	gpu="H100"
	)
	def finetune():
	import subprocess
	print("🍡 Finetuning Mochi. This may take 3 hours.")
	print("🍡 See your mochi-tune-finetunes volume for intermediate checkpoints and samples.")
	subprocess.run([
	"bash", "demos/fine_tuner/run.bash",
	"-c", "lora.yaml", # from our locally mounted yaml file
	"-n", "1",
	])

	# Remote function (Modal @cls) for running inference on one or multiple videos
	# Run it with the @local_entrypoint below
	@app.cls(
	image = base_img,
	volumes={
	"/weights": weights_volume,
	"/finetunes": finetunes_volume,
	"/outputs": outputs_volume,
	},
	timeout=30*MINUTES,
	gpu="H100"
	)
	class MochiLora():
	def __init__(self, model_dir: str = "/weights", lora_path: str = None, cpu_offload: bool = False):
	self.model_dir = model_dir
	self.lora_path = lora_path
	self.cpu_offload = cpu_offload

	@modal.enter()
	def start(self):
	from genmo.mochi_preview.pipelines import (
	DecoderModelFactory,
	DitModelFactory,
	MochiMultiGPUPipeline,
	MochiSingleGPUPipeline,
	T5ModelFactory,
	)
	import torch

	"""Initialize the model - this runs once when the container starts"""
	print("🍡 Loading Mochi model.")

	self.num_gpus = torch.cuda.device_count()

	# Configure pipeline based on GPU count
	klass = MochiSingleGPUPipeline if self.num_gpus == 1 else MochiMultiGPUPipeline

	kwargs = dict(
	text_encoder_factory=T5ModelFactory(),
	dit_factory=DitModelFactory(
	model_path=f"{self.model_dir}/dit.safetensors",
	lora_path=self.lora_path,
	model_dtype="bf16",
	),
	decoder_factory=DecoderModelFactory(
	model_path=f"{self.model_dir}/decoder.safetensors",
	),
	)

	if self.num_gpus > 1:
	assert not self.lora_path, f"Lora not supported in multi-GPU mode"
	assert not self.cpu_offload, "CPU offload not supported in multi-GPU mode"
	kwargs["world_size"] = self.num_gpus
	else:
	kwargs["cpu_offload"] = self.cpu_offload
	kwargs["decode_type"] = "tiled_spatial"
	kwargs["fast_init"] = not self.lora_path
	kwargs["strict_load"] = not self.lora_path
	kwargs["decode_args"] = dict(overlap=8)

	self.pipeline = klass(**kwargs)
	print(f"🍡 Model loaded successfully with {self.num_gpus} GPUs")

	@modal.method()
	def generate(self,
	prompt: str,
	negative_prompt: str = "",
	width: int = 848,
	height: int = 480,
	num_frames: int = 163,
	seed: int = 1710977262,
	cfg_scale: float = 6.0,
	num_inference_steps: int = 64) -> str:
	"""Generate video based on the prompt and parameters"""

	print("🍡 Generating video.")

	import json
	import os
	import time

	import numpy as np

	from genmo.lib.progress import progress_bar
	from genmo.lib.utils import save_video
	from genmo.mochi_preview.pipelines import linear_quadratic_schedule


	# Create sigma schedule
	sigma_schedule = linear_quadratic_schedule(num_inference_steps, 0.025)
	cfg_schedule = [cfg_scale] * num_inference_steps

	args = {
	"height": height,
	"width": width,
	"num_frames": num_frames,
	"sigma_schedule": sigma_schedule,
	"cfg_schedule": cfg_schedule,
	"num_inference_steps": num_inference_steps,
	"batch_cfg": False,
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"seed": seed,
	}

	with progress_bar(type="tqdm"):
	final_frames = self.pipeline(**args)
	final_frames = final_frames[0]

	assert isinstance(final_frames, np.ndarray)
	assert final_frames.dtype == np.float32

	# Save to mounted volume
	output_dir = "/outputs" # Assuming this path exists in the mounted volume
	os.makedirs(output_dir, exist_ok=True)
	output_path = os.path.join(output_dir, f"output_{int(time.time())}.mp4")

	save_video(final_frames, output_path)

	# Save generation parameters
	json_path = os.path.splitext(output_path)[0] + ".json"
	json.dump(args, open(json_path, "w"), indent=4)

	print(f"🍡 Video saved to {output_path}")
	outputs_volume.commit()
	return output_path.split("/")[-1]

	# Local entrypoint for using the MochiLora class
	# Select the lora_path you'd want to use from the finetunes volume
	# Then it with:
	# modal run main
	@app.local_entrypoint()
	def main(
	prompt="A pristine snowglobe featuring a winter scene sits peacefully. The glass begins to crumble into fine powder, as the entire sphere deteriorates into sparkling dust that drifts outward. The fake snow mingles with the crystalline particles, creating a glittering cloud captured in high-speed photography.",
	negative_prompt="blurry, low quality",
	width=848,
	height=480,
	num_frames=49, # (num_frames - 1) must be divisible by 6
	seed=1710977262,
	cfg_scale=6.0,
	num_inference_steps=64,
	lora_path="/finetunes/my_mochi_lora/model_2000.lora.safetensors",
	cpu_offload=True,
	):
	lora = MochiLora(
	lora_path=lora_path, # your lora path
	cpu_offload=cpu_offload,
	)
	output_path = lora.generate.remote(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	num_frames=num_frames,
	seed=seed,
	cfg_scale=cfg_scale,
	num_inference_steps=num_inference_steps,
	)

	local_dir = Path("/tmp/mochi")
	local_dir.mkdir(exist_ok=True, parents=True)
	local_path = local_dir / output_path
	local_path.write_bytes(b"".join(outputs_volume.read_file(output_path)))
	print(f"🍡 video saved locally at {local_path}")