svjack/GenshinImpact_XL_Base
This model is derived from CivitAI.
Acknowledgments
Special thanks to mobeimunan for their contributions to the development of this model.
Supported Characters
The model currently supports the following 73 characters from Genshin Impact:
name_dict = {
'旅行者女': 'lumine',
'旅行者男': 'aether',
'派蒙': 'PAIMON',
'迪奥娜': 'DIONA',
'菲米尼': 'FREMINET',
'甘雨': 'GANYU',
'凯亚': 'KAEYA',
'莱依拉': 'LAYLA',
'罗莎莉亚': 'ROSARIA',
'七七': 'QIQI',
'申鹤': 'SHENHE',
'神里绫华': 'KAMISATO AYAKA',
'优菈': 'EULA',
'重云': 'CHONGYUN',
'夏洛蒂': 'charlotte',
'莱欧斯利': 'WRIOTHESLEY',
'艾尔海森': 'ALHAITHAM',
'柯莱': 'COLLEI',
'纳西妲': 'NAHIDA',
'绮良良': 'KIRARA',
'提纳里': 'TIGHNARI',
'瑶瑶': 'YAOYAO',
'珐露珊': 'FARUZAN',
'枫原万叶': 'KAEDEHARA KAZUHA',
'琳妮特': 'LYNETTE',
'流浪者 散兵': 'scaramouche',
'鹿野院平藏': 'SHIKANOIN HEIZOU',
'琴': 'JEAN',
'砂糖': 'SUCROSE',
'温迪': 'VENTI',
'魈': 'XIAO',
'早柚': 'SAYU',
'安柏': 'AMBER',
'班尼特': 'BENNETT',
'迪卢克': 'DILUC',
'迪西娅': 'DEHYA',
'胡桃': 'HU TAO',
'可莉': 'KLEE',
'林尼': 'LYNEY',
'托马': 'THOMA',
'香菱': 'XIANG LING',
'宵宫': 'YOIMIYA',
'辛焱': 'XINYAN',
'烟绯': 'YANFEI',
'八重神子': 'YAE MIKO',
'北斗': 'BEIDOU',
'菲谢尔': 'FISCHL',
'九条裟罗': 'KUJO SARA',
'久岐忍': 'KUKI SHINOBU',
'刻晴': 'KEQING',
'雷电将军': 'RAIDEN SHOGUN',
'雷泽': 'RAZOR',
'丽莎': 'LISA',
'赛诺': 'CYNO',
'芙宁娜': 'FURINA',
'芭芭拉': 'BARBARA',
'公子 达达利亚': 'TARTAGLIA',
'坎蒂丝': 'CANDACE',
'莫娜': 'MONA',
'妮露': 'NILOU',
'珊瑚宫心海': 'SANGONOMIYA KOKOMI',
'神里绫人': 'KAMISATO AYATO',
'行秋': 'XINGQIU',
'夜兰': 'YELAN',
'那维莱特': 'NEUVILLETTE',
'娜维娅': 'NAVIA',
'阿贝多': 'ALBEDO',
'荒泷一斗': 'ARATAKI ITTO',
'凝光': 'NING GUANG',
'诺艾尔': 'NOELLE',
'五郎': 'GOROU',
'云堇': 'YUN JIN',
'钟离': 'ZHONGLI'
}
Installation
To use this model, you need to install the following dependencies:
sudo apt-get update && sudo apt-get install git-lfs ffmpeg cbm
pip install -U diffusers transformers sentencepiece peft controlnet-aux moviepy
Example Usage
Generating an Image of Zhongli
Here's an example of how to generate an image of Zhongli using this model:
from diffusers import StableDiffusionXLPipeline
import torch
pipeline = StableDiffusionXLPipeline.from_pretrained(
"svjack/GenshinImpact_XL_Base",
torch_dtype=torch.float16
).to("cuda")
prompt = "solo,ZHONGLI\(genshin impact\),1boy,portrait,upper_body,highres,"
negative_prompt = "nsfw,lowres,(bad),text,error,fewer,extra,missing,worst quality,jpeg artifacts,low quality,watermark,unfinished,displeasing,oldest,early,chromatic aberration,signature,extra digits,artistic error,username,scan,[abstract],"
image = pipeline(
prompt=prompt,
negative_prompt=negative_prompt,
generator=torch.manual_seed(0),
).images[0]
image
image.save("zhongli_1024x1024.png")
钟离
Using Canny ControlNet to Restore 2D Images from 3D Toy Photos
Here's an example of how to use Canny ControlNet to restore 2D images from 3D toy photos:
Genshin Impact 3D Toys
钟离
派蒙
from diffusers import AutoPipelineForText2Image, ControlNetModel
from diffusers.utils import load_image
import torch
from PIL import Image
from controlnet_aux import CannyDetector
controlnet = ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
)
pipeline = AutoPipelineForText2Image.from_pretrained(
"svjack/GenshinImpact_XL_Base",
controlnet=controlnet,
torch_dtype=torch.float16
).to("cuda")
#pipeline.enable_model_cpu_offload()
canny = CannyDetector()
canny(Image.open("zhongli-cb.jpg")).save("zhongli-cb-canny.jpg")
canny_image = load_image(
"zhongli-cb-canny.jpg"
)
controlnet_conditioning_scale = 0.5
generator = torch.Generator(device="cpu").manual_seed(1)
images = pipeline(
prompt="solo,ZHONGLI\(genshin impact\),1boy,portrait,highres",
controlnet_conditioning_scale=controlnet_conditioning_scale,
image=canny_image,
num_inference_steps=50,
guidance_scale=7.0,
generator=generator,
).images
images[0]
images[0].save("zhongli_trans.png")
canny = CannyDetector()
canny(Image.open("paimon-cb-crop.jpg")).save("paimon-cb-canny.jpg")
canny_image = load_image(
"paimon-cb-canny.jpg"
)
controlnet_conditioning_scale = 0.7
generator = torch.Generator(device="cpu").manual_seed(3)
images = pipeline(
prompt="solo,PAIMON\(genshin impact\),1girl,portrait,highres, bright, shiny, high detail, anime",
controlnet_conditioning_scale=controlnet_conditioning_scale,
image=canny_image,
num_inference_steps=50,
guidance_scale=8.0,
generator=generator,
).images
images[0]
images[0].save("paimon_trans.png")
Creating a Grid Image
You can also create a grid image from a list of PIL Image objects:
from PIL import Image
def create_grid_image(image_list, rows, cols, cell_width, cell_height):
"""
Create a grid image from a list of PIL Image objects.
:param image_list: A list of PIL Image objects
:param rows: Number of rows in the grid
:param cols: Number of columns in the grid
:param cell_width: Width of each cell in the grid
:param cell_height: Height of each cell in the grid
:return: The resulting grid image
"""
total_width = cols * cell_width
total_height = rows * cell_height
grid_image = Image.new('RGB', (total_width, total_height))
for i, img in enumerate(image_list):
row = i // cols
col = i % cols
img = img.resize((cell_width, cell_height))
x_offset = col * cell_width
y_offset = row * cell_height
grid_image.paste(img, (x_offset, y_offset))
return grid_image
create_grid_image([Image.open("zhongli-cb.jpg") ,Image.open("zhongli-cb-canny.jpg"), Image.open("zhongli_trans.png")], 1, 3, 512, 768)
create_grid_image([Image.open("paimon-cb-crop.jpg") ,Image.open("paimon-cb-canny.jpg"), Image.open("paimon_trans.png")], 1, 3, 512, 768)
This will create a grid image showing the original, Canny edge detection, and transformed images side by side.
Below image list in : (Genshin Impact Toy/ Canny Image / Gemshin Impact Restore 2D Image)
钟离
派蒙
Generating an Animation of Zhongli
Here's an example of how to generate an animation of Zhongli using the AnimateDiffSDXLPipeline
:
import torch
from diffusers.models import MotionAdapter
from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
from diffusers.utils import export_to_gif
adapter = MotionAdapter.from_pretrained(
"a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
)
model_id = "svjack/GenshinImpact_XL_Base"
scheduler = DDIMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
clip_sample=False,
timestep_spacing="linspace",
beta_schedule="linear",
steps_offset=1,
)
pipe = AnimateDiffSDXLPipeline.from_pretrained(
model_id,
motion_adapter=adapter,
scheduler=scheduler,
torch_dtype=torch.float16,
).to("cuda")
# enable memory savings
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()
output = pipe(
prompt="solo,ZHONGLI\(genshin impact\),1boy,portrait,upper_body,highres, keep eyes forward.",
negative_prompt="low quality, worst quality",
num_inference_steps=20,
guidance_scale=8,
width=1024,
height=1024,
num_frames=16,
generator=torch.manual_seed(4),
)
frames = output.frames[0]
export_to_gif(frames, "zhongli_animation.gif")
from diffusers.utils import export_to_video
export_to_video(frames, "zhongli_animation.mp4")
from IPython import display
display.Video("zhongli_animation.mp4", width=512, height=512)
Use AutoPipelineForImage2Image
to enhance output:
from moviepy.editor import VideoFileClip
from PIL import Image
clip = VideoFileClip("zhongli_animation.mp4")
frames = list(map(Image.fromarray ,clip.iter_frames()))
from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
from diffusers.utils import load_image, make_image_grid
import torch
pipeline_text2image = AutoPipelineForText2Image.from_pretrained(
"svjack/GenshinImpact_XL_Base",
torch_dtype=torch.float16
)
# use from_pipe to avoid consuming additional memory when loading a checkpoint
pipeline = AutoPipelineForImage2Image.from_pipe(pipeline_text2image).to("cuda")
from tqdm import tqdm
req = []
for init_image in tqdm(frames):
prompt = "solo,ZHONGLI\(genshin impact\),1boy,portrait,upper_body,highres, keep eyes forward."
image = pipeline(prompt, image=init_image, strength=0.8, guidance_scale=10.5).images[0]
req.append(image)
from diffusers.utils import export_to_video
export_to_video(req, "zhongli_animation_im2im.mp4")
from IPython import display
display.Video("zhongli_animation_im2im.mp4", width=512, height=512)
Enhancing Animation with RIFE
To enhance the animation using RIFE (Real-Time Intermediate Flow Estimation):
git clone https://github.com/svjack/Practical-RIFE && cd Practical-RIFE && pip install -r requirements.txt
python inference_video.py --multi=128 --video=zhongli_animation_im2im.mp4
from moviepy.editor import VideoFileClip
clip = VideoFileClip("zhongli_animation_im2im_128X_1280fps.mp4")
def speed_change_video(video_clip, speed_factor, output_path):
if speed_factor == 1:
# 如果变速因子为1,直接复制原视频
video_clip.write_videofile(output_path, codec="libx264")
else:
# 否则,按变速因子调整视频速度
new_duration = video_clip.duration / speed_factor
sped_up_clip = video_clip.speedx(speed_factor)
sped_up_clip.write_videofile(output_path, codec="libx264")
speed_change_video(clip, 0.05, "zhongli_animation_im2im_128X_1280fps_wrt.mp4")
VideoFileClip("zhongli_animation_im2im_128X_1280fps_wrt.mp4").set_duration(10).write_videofile("zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4", codec="libx264")
from IPython import display
display.Video("zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4", width=512, height=512)
Merging Videos Horizontally
You can merge two videos horizontally using the following function:
from moviepy.editor import VideoFileClip, CompositeVideoClip
def merge_videos_horizontally(video_path1, video_path2, output_video_path):
clip1 = VideoFileClip(video_path1)
clip2 = VideoFileClip(video_path2)
max_duration = max(clip1.duration, clip2.duration)
if clip1.duration < max_duration:
clip1 = clip1.loop(duration=max_duration)
if clip2.duration < max_duration:
clip2 = clip2.loop(duration=max_duration)
total_width = clip1.w + clip2.w
total_height = max(clip1.h, clip2.h)
final_clip = CompositeVideoClip([
clip1.set_position(("left", "center")),
clip2.set_position(("right", "center"))
], size=(total_width, total_height))
final_clip.write_videofile(output_video_path, codec='libx264')
print(f"Merged video saved to {output_video_path}")
# Example usage
video_path1 = "zhongli_animation.mp4" # 第一个视频文件路径
video_path2 = "zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4" # 第二个视频文件路径
output_video_path = "zhongli_inter_video_im2im_compare.mp4" # 输出视频的路径
merge_videos_horizontally(video_path1, video_path2, output_video_path)
Left is zhongli_animation.mp4 (By AnimateDiffSDXLPipeline), Right is zhongli_animation_im2im_128X_1280fps_wrt_10s.mp4 (By AutoPipelineForImage2Image + Practical-RIFE)
钟离