JosephusCheung's picture
Upload 121 files
8832500
raw
history blame
4.66 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta charset="utf-8" />
</head>
<body style="margin: 0;">
<div id="p9" style="overflow: hidden; position: relative; background-color: white; width: 2200px; height: 1237px;">
<!-- Begin shared CSS values -->
<style class="shared-css" type="text/css" >
.t {
transform-origin: bottom left;
z-index: 2;
position: absolute;
white-space: pre;
overflow: visible;
line-height: 1.5;
}
.text-container {
white-space: pre;
}
@supports (-webkit-touch-callout: none) {
.text-container {
white-space: normal;
}
}
</style>
<!-- End shared CSS values -->
<!-- Begin inline CSS -->
<style type="text/css" >
#t1_9{left:0px;bottom:1054px;letter-spacing:-0.23px;}
#t2_9{left:0px;bottom:917px;letter-spacing:-0.24px;}
#t3_9{left:0px;bottom:781px;letter-spacing:-0.24px;}
#t4_9{left:0px;bottom:645px;letter-spacing:-0.21px;}
#t5_9{left:565px;bottom:998px;letter-spacing:0.2px;}
#t6_9{left:565px;bottom:934px;letter-spacing:0.2px;}
#t7_9{left:565px;bottom:870px;letter-spacing:0.19px;}
#t8_9{left:609px;bottom:1080px;letter-spacing:0.15px;}
#t9_9{left:565px;bottom:731px;letter-spacing:0.21px;}
#ta_9{left:861px;bottom:731px;letter-spacing:0.23px;}
#tb_9{left:565px;bottom:667px;letter-spacing:0.2px;}
#tc_9{left:823px;bottom:667px;letter-spacing:0.19px;}
#td_9{left:565px;bottom:603px;letter-spacing:0.21px;}
#te_9{left:208px;bottom:456px;letter-spacing:0.21px;}
#tf_9{left:504px;bottom:456px;letter-spacing:0.2px;}
#tg_9{left:208px;bottom:391px;letter-spacing:0.22px;}
#th_9{left:208px;bottom:327px;letter-spacing:0.22px;}
#ti_9{left:208px;bottom:263px;letter-spacing:0.21px;}
#tj_9{left:208px;bottom:199px;letter-spacing:0.21px;}
.s1_9{font-size:115px;font-family:IBMPlexSans_2d;color:#000;}
.s2_9{font-size:115px;font-family:IBMPlexSans-Bold_2l;color:#000;}
.s3_9{font-size:53px;font-family:IBMPlexSans-Italic_2o;color:#000;}
.s4_9{font-size:99px;font-family:IBMPlexSans-Bold_2l;color:#000;}
.s5_9{font-size:53px;font-family:IBMPlexSans_2d;color:#000;}
</style>
<!-- End inline CSS -->
<!-- Begin embedded font definitions -->
<style id="fonts9" type="text/css" >
@font-face {
font-family: IBMPlexSans-Bold_2l;
src: url("fonts/IBMPlexSans-Bold_2l.woff") format("woff");
}
@font-face {
font-family: IBMPlexSans-Italic_2o;
src: url("fonts/IBMPlexSans-Italic_2o.woff") format("woff");
}
@font-face {
font-family: IBMPlexSans_2d;
src: url("fonts/IBMPlexSans_2d.woff") format("woff");
}
</style>
<!-- End embedded font definitions -->
<!-- Begin page background -->
<div id="pg9Overlay" style="width:100%; height:100%; position:absolute; z-index:1; background-color:rgba(0,0,0,0); -webkit-user-select: none;"></div>
<div id="pg9" style="-webkit-user-select: none;"><object width="2200" height="1237" data="9/9.svg" type="image/svg+xml" id="pdf9" style="width:2200px; height:1237px; -moz-transform:scale(1); z-index: 0;"></object></div>
<!-- End page background -->
<!-- Begin text definitions (Positioned/styled in CSS) -->
<div class="text-container"><span id="t1_9" class="t s1_9">Problems </span>
<span id="t2_9" class="t s1_9">and </span>
<span id="t3_9" class="t s2_9">Proposed </span>
<span id="t4_9" class="t s2_9">Solutions </span>
<span id="t5_9" class="t s3_9">Fine-tuning a diffusion model on a small set of subject images </span>
<span id="t6_9" class="t s3_9">causes it to lose the ability to generate generic images of the same </span>
<span id="t7_9" class="t s3_9">class and forget the class-specific prior. </span>
<span id="t8_9" class="t s4_9">1.Language Drift </span>
<span id="t9_9" class="t s5_9">Solution 1 </span><span id="ta_9" class="t s5_9">Dreambooth use the model's own generated samples </span>
<span id="tb_9" class="t s5_9">by adding </span><span id="tc_9" class="t s5_9">a relative weight of the prior-preservation loss. </span>
<span id="td_9" class="t s5_9">However the ratio of prior-preservation is not easy to determine. </span>
<span id="te_9" class="t s5_9">Solution 2 </span><span id="tf_9" class="t s5_9">This is a method that requires a lot of GPU time - during the regular </span>
<span id="tg_9" class="t s5_9">training process, we add auto-generated images from the current model with </span>
<span id="th_9" class="t s5_9">prompt of a single word, with words chosen from a pre-estimated word frequency </span>
<span id="ti_9" class="t s5_9">list randomly according to a certain ratio (we chose our word list from Danbooru </span>
<span id="tj_9" class="t s5_9">Tags). To avoid overfitting, each auto-generated image is used only once. </span></div>
<!-- End text definitions -->
</div>
</body>
</html>