svjack/GenshinImpact_XL_Base

This model is derived from CivitAI.

Acknowledgments

Special thanks to mobeimunan for their contributions to the development of this model.

Supported Characters

The model currently supports the following 73 characters from Genshin Impact:

name_dict = {
    '旅行者女': 'lumine',
    '旅行者男': 'aether',
    '派蒙': 'PAIMON',
    '迪奥娜': 'DIONA',
    '菲米尼': 'FREMINET',
    '甘雨': 'GANYU',
    '凯亚': 'KAEYA',
    '莱依拉': 'LAYLA',
    '罗莎莉亚': 'ROSARIA',
    '七七': 'QIQI',
    '申鹤': 'SHENHE',
    '神里绫华': 'KAMISATO AYAKA',
    '优菈': 'EULA',
    '重云': 'CHONGYUN',
    '夏洛蒂': 'charlotte',
    '莱欧斯利': 'WRIOTHESLEY',
    '艾尔海森': 'ALHAITHAM',
    '柯莱': 'COLLEI',
    '纳西妲': 'NAHIDA',
    '绮良良': 'KIRARA',
    '提纳里': 'TIGHNARI',
    '瑶瑶': 'YAOYAO',
    '珐露珊': 'FARUZAN',
    '枫原万叶': 'KAEDEHARA KAZUHA',
    '琳妮特': 'LYNETTE',
    '流浪者 散兵': 'scaramouche',
    '鹿野院平藏': 'SHIKANOIN HEIZOU',
    '琴': 'JEAN',
    '砂糖': 'SUCROSE',
    '温迪': 'VENTI',
    '魈': 'XIAO',
    '早柚': 'SAYU',
    '安柏': 'AMBER',
    '班尼特': 'BENNETT',
    '迪卢克': 'DILUC',
    '迪西娅': 'DEHYA',
    '胡桃': 'HU TAO',
    '可莉': 'KLEE',
    '林尼': 'LYNEY',
    '托马': 'THOMA',
    '香菱': 'XIANG LING',
    '宵宫': 'YOIMIYA',
    '辛焱': 'XINYAN',
    '烟绯': 'YANFEI',
    '八重神子': 'YAE MIKO',
    '北斗': 'BEIDOU',
    '菲谢尔': 'FISCHL',
    '九条裟罗': 'KUJO SARA',
    '久岐忍': 'KUKI SHINOBU',
    '刻晴': 'KEQING',
    '雷电将军': 'RAIDEN SHOGUN',
    '雷泽': 'RAZOR',
    '丽莎': 'LISA',
    '赛诺': 'CYNO',
    '芙宁娜': 'FURINA',
    '芭芭拉': 'BARBARA',
    '公子 达达利亚': 'TARTAGLIA',
    '坎蒂丝': 'CANDACE',
    '莫娜': 'MONA',
    '妮露': 'NILOU',
    '珊瑚宫心海': 'SANGONOMIYA KOKOMI',
    '神里绫人': 'KAMISATO AYATO',
    '行秋': 'XINGQIU',
    '夜兰': 'YELAN',
    '那维莱特': 'NEUVILLETTE',
    '娜维娅': 'NAVIA',
    '阿贝多': 'ALBEDO',
    '荒泷一斗': 'ARATAKI ITTO',
    '凝光': 'NING GUANG',
    '诺艾尔': 'NOELLE',
    '五郎': 'GOROU',
    '云堇': 'YUN JIN',
    '钟离': 'ZHONGLI'
}

Installation

To use this model, you need to install the following dependencies:

sudo apt-get update && sudo apt-get install git-lfs ffmpeg cbm
pip install -U diffusers transformers sentencepiece peft controlnet-aux moviepy

Example Usage

Generating an Image of Zhongli

Here's an example of how to generate an image of Zhongli using this model:

from diffusers import StableDiffusionXLPipeline
import torch

pipeline = StableDiffusionXLPipeline.from_pretrained(
    "svjack/GenshinImpact_XL_Base",
    torch_dtype=torch.float16
).to("cuda")

prompt = "solo,ZHONGLI\(genshin impact\),1boy,portrait,upper_body,highres,"
negative_prompt = "nsfw,lowres,(bad),text,error,fewer,extra,missing,worst quality,jpeg artifacts,low quality,watermark,unfinished,displeasing,oldest,early,chromatic aberration,signature,extra digits,artistic error,username,scan,[abstract],"
image = pipeline(
    prompt=prompt,
    negative_prompt=negative_prompt,
    generator=torch.manual_seed(0),
).images[0]
image
image.save("zhongli_1024x1024.png")

钟离

Using Canny ControlNet to Restore 2D Images from 3D Toy Photos

Here's an example of how to use Canny ControlNet to restore 2D images from 3D toy photos:

Genshin Impact 3D Toys

钟离

派蒙

from diffusers import AutoPipelineForText2Image, ControlNetModel
from diffusers.utils import load_image
import torch
from PIL import Image
from controlnet_aux import CannyDetector

controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
)

pipeline = AutoPipelineForText2Image.from_pretrained(
    "svjack/GenshinImpact_XL_Base",
    controlnet=controlnet,
    torch_dtype=torch.float16
).to("cuda")
#pipeline.enable_model_cpu_offload()

canny = CannyDetector()
canny(Image.open("zhongli-cb.jpg")).save("zhongli-cb-canny.jpg")
canny_image = load_image(
    "zhongli-cb-canny.jpg"
)

controlnet_conditioning_scale = 0.5
generator = torch.Generator(device="cpu").manual_seed(1)
images = pipeline(
        prompt="solo,ZHONGLI\(genshin impact\),1boy,portrait,highres",
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        image=canny_image,
        num_inference_steps=50,
        guidance_scale=7.0,
        generator=generator,
    ).images
images[0]
images[0].save("zhongli_trans.png")

canny = CannyDetector()
canny(Image.open("paimon-cb-crop.jpg")).save("paimon-cb-canny.jpg")
canny_image = load_image(
    "paimon-cb-canny.jpg"
)

controlnet_conditioning_scale = 0.7
generator = torch.Generator(device="cpu").manual_seed(3)
images = pipeline(
        prompt="solo,PAIMON\(genshin impact\),1girl,portrait,highres, bright, shiny, high detail, anime",
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        image=canny_image,
        num_inference_steps=50,
        guidance_scale=8.0,
        generator=generator,
    ).images
images[0]
images[0].save("paimon_trans.png")

Creating a Grid Image

You can also create a grid image from a list of PIL Image objects:

from PIL import Image

def create_grid_image(image_list, rows, cols, cell_width, cell_height):
    """
    Create a grid image from a list of PIL Image objects.

    :param image_list: A list of PIL Image objects
    :param rows: Number of rows in the grid
    :param cols: Number of columns in the grid
    :param cell_width: Width of each cell in the grid
    :param cell_height: Height of each cell in the grid
    :return: The resulting grid image
    """
    total_width = cols * cell_width
    total_height = rows * cell_height

    grid_image = Image.new('RGB', (total_width, total_height))

    for i, img in enumerate(image_list):
        row = i // cols
        col = i % cols

        img = img.resize((cell_width, cell_height))

        x_offset = col * cell_width
        y_offset = row * cell_height

        grid_image.paste(img, (x_offset, y_offset))

    return grid_image

create_grid_image([Image.open("zhongli-cb.jpg") ,Image.open("zhongli-cb-canny.jpg"), Image.open("zhongli_trans.png")], 1, 3, 512, 768)

create_grid_image([Image.open("paimon-cb-crop.jpg") ,Image.open("paimon-cb-canny.jpg"), Image.open("paimon_trans.png")], 1, 3, 512, 768)

This will create a grid image showing the original, Canny edge detection, and transformed images side by side.

Below image list in : (Genshin Impact Toy/ Canny Image / Gemshin Impact Restore 2D Image)

钟离

派蒙

Generating an Animation of Zhongli

Here's an example of how to generate an animation of Zhongli using the AnimateDiffSDXLPipeline:

import torch
from diffusers.models import MotionAdapter
from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
from diffusers.utils import export_to_gif

adapter = MotionAdapter.from_pretrained(
    "a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
)

model_id = "svjack/GenshinImpact_XL_Base"
scheduler = DDIMScheduler.from_pretrained(
    model_id,
    subfolder="scheduler",
    clip_sample=False,
    timestep_spacing="linspace",
    beta_schedule="linear",
    steps_offset=1,
)

pipe = AnimateDiffSDXLPipeline.from_pretrained(
    model_id,
    motion_adapter=adapter,
    scheduler=scheduler,
    torch_dtype=torch.float16,
).to("cuda")

# enable memory savings
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()

output = pipe(
    prompt="solo,ZHONGLI\(genshin impact\),1boy,portrait,upper_body,highres, keep eyes forward.",
    negative_prompt="low quality, worst quality",
    num_inference_steps=20,
    guidance_scale=8,
    width=1024,
    height=1024,
    num_frames=16,
    generator=torch.manual_seed(4),
)
frames = output.frames[0]
export_to_gif(frames, "zhongli_animation.gif")

from diffusers.utils import export_to_video
export_to_video(frames, "zhongli_animation.mp4")
from IPython import display
display.Video("zhongli_animation.mp4", width=512, height=512)

Use AutoPipelineForImage2Image to enhance output:

from moviepy.editor import VideoFileClip
from PIL import Image
clip = VideoFileClip("zhongli_animation.mp4")
frames = list(map(Image.fromarray ,clip.iter_frames()))

from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
from diffusers.utils import load_image, make_image_grid
import torch

pipeline_text2image = AutoPipelineForText2Image.from_pretrained(
    "svjack/GenshinImpact_XL_Base",
    torch_dtype=torch.float16
)

# use from_pipe to avoid consuming additional memory when loading a checkpoint
pipeline = AutoPipelineForImage2Image.from_pipe(pipeline_text2image).to("cuda")

from tqdm import tqdm
req = []
for init_image in tqdm(frames):
    prompt = "solo,ZHONGLI\(genshin impact\),1boy,portrait,upper_body,highres, keep eyes forward."
    image = pipeline(prompt, image=init_image, strength=0.8, guidance_scale=10.5).images[0]
    req.append(image)

from diffusers.utils import export_to_video
export_to_video(req, "zhongli_animation_im2im.mp4")
from IPython import display
display.Video("zhongli_animation_im2im.mp4", width=512, height=512)

Enhancing Animation with RIFE

To enhance the animation using RIFE (Real-Time Intermediate Flow Estimation):

git clone https://github.com/svjack/Practical-RIFE && cd Practical-RIFE && pip install -r requirements.txt
python inference_video.py --multi=128 --video=zhongli_animation_im2im.mp4

from moviepy.editor import VideoFileClip
clip = VideoFileClip("zhongli_animation_im2im_128X_1280fps.mp4")

def speed_change_video(video_clip, speed_factor, output_path):
    if speed_factor == 1:
        # 如果变速因子为1，直接复制原视频
        video_clip.write_videofile(output_path, codec="libx264")
    else:
        # 否则，按变速因子调整视频速度
        new_duration = video_clip.duration / speed_factor
        sped_up_clip = video_clip.speedx(speed_factor)
        sped_up_clip.write_videofile(output_path, codec="libx264")

speed_change_video(clip, 0.05, "zhongli_animation_im2im_128X_1280fps_wrt.mp4")

VideoFileClip("zhongli_animation_im2im_128X_1280fps_wrt.mp4").set_duration(10).write_videofile("zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4", codec="libx264")
from IPython import display
display.Video("zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4", width=512, height=512)

Merging Videos Horizontally

You can merge two videos horizontally using the following function:

from moviepy.editor import VideoFileClip, CompositeVideoClip

def merge_videos_horizontally(video_path1, video_path2, output_video_path):
    clip1 = VideoFileClip(video_path1)
    clip2 = VideoFileClip(video_path2)

    max_duration = max(clip1.duration, clip2.duration)

    if clip1.duration < max_duration:
        clip1 = clip1.loop(duration=max_duration)
    if clip2.duration < max_duration:
        clip2 = clip2.loop(duration=max_duration)

    total_width = clip1.w + clip2.w
    total_height = max(clip1.h, clip2.h)

    final_clip = CompositeVideoClip([
        clip1.set_position(("left", "center")),
        clip2.set_position(("right", "center"))
    ], size=(total_width, total_height))

    final_clip.write_videofile(output_video_path, codec='libx264')

    print(f"Merged video saved to {output_video_path}")

# Example usage
video_path1 = "zhongli_animation.mp4"  # 第一个视频文件路径
video_path2 = "zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4"  # 第二个视频文件路径
output_video_path = "zhongli_inter_video_im2im_compare.mp4"  # 输出视频的路径
merge_videos_horizontally(video_path1, video_path2, output_video_path)

Left is zhongli_animation.mp4 (By AnimateDiffSDXLPipeline), Right is zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4 (By AutoPipelineForImage2Image + Practical-RIFE)

钟离