Remove WandB integration from hf_ui.py and run_modal_from_hf.py: eliminated references to WandB logging and related secrets to streamline training process.
Browse files- hf_ui.py +0 -4
- run_modal_from_hf.py +1 -21
hf_ui.py
CHANGED
@@ -115,7 +115,6 @@ def start_training(
|
|
115 |
use_more_advanced_options,
|
116 |
more_advanced_options,
|
117 |
push_to_hub,
|
118 |
-
use_wandb,
|
119 |
):
|
120 |
print("Starting training from gradio app")
|
121 |
|
@@ -228,7 +227,6 @@ def start_training(
|
|
228 |
# add wandb if needed
|
229 |
config['config']['process'][0]['logging'] = {
|
230 |
"log_every": 10,
|
231 |
-
"use_wandb": use_wandb,
|
232 |
"verbose": False
|
233 |
}
|
234 |
|
@@ -369,7 +367,6 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
369 |
|
370 |
with gr.Row():
|
371 |
push_to_hub = gr.Checkbox(label="Push to Hub", value=True)
|
372 |
-
use_wandb = gr.Checkbox(label="Use WandB", value=False)
|
373 |
start = gr.Button("Start training")
|
374 |
output_components.append(start)
|
375 |
progress_area = gr.Markdown("")
|
@@ -408,7 +405,6 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
408 |
use_more_advanced_options,
|
409 |
more_advanced_options,
|
410 |
push_to_hub,
|
411 |
-
use_wandb
|
412 |
],
|
413 |
outputs=progress_area,
|
414 |
)
|
|
|
115 |
use_more_advanced_options,
|
116 |
more_advanced_options,
|
117 |
push_to_hub,
|
|
|
118 |
):
|
119 |
print("Starting training from gradio app")
|
120 |
|
|
|
227 |
# add wandb if needed
|
228 |
config['config']['process'][0]['logging'] = {
|
229 |
"log_every": 10,
|
|
|
230 |
"verbose": False
|
231 |
}
|
232 |
|
|
|
367 |
|
368 |
with gr.Row():
|
369 |
push_to_hub = gr.Checkbox(label="Push to Hub", value=True)
|
|
|
370 |
start = gr.Button("Start training")
|
371 |
output_components.append(start)
|
372 |
progress_area = gr.Markdown("")
|
|
|
405 |
use_more_advanced_options,
|
406 |
more_advanced_options,
|
407 |
push_to_hub,
|
|
|
408 |
],
|
409 |
outputs=progress_area,
|
410 |
)
|
run_modal_from_hf.py
CHANGED
@@ -23,7 +23,6 @@ sys.path.insert(0, "/root/ai-toolkit")
|
|
23 |
os.environ['DISABLE_TELEMETRY'] = 'YES'
|
24 |
# Khai báo secrets
|
25 |
hf_secret = modal.Secret.from_name("huggingface-secret")
|
26 |
-
wandb_secret = modal.Secret.from_name("wandb-secret")
|
27 |
|
28 |
# define the volume for storing model outputs, using "creating volumes lazily": https://modal.com/docs/guide/volumes
|
29 |
# you will find your model, samples and optimizer stored in: https://modal.com/storage/your-username/main/flux-lora-models
|
@@ -93,7 +92,6 @@ if os.environ.get("DEBUG_TOOLKIT", "0") == "1":
|
|
93 |
|
94 |
import argparse
|
95 |
from toolkit.job import get_job
|
96 |
-
from toolkit.logging import WandbLogger
|
97 |
|
98 |
def print_end_message(jobs_completed, jobs_failed):
|
99 |
failure_string = f"{jobs_failed} failure{'' if jobs_failed == 1 else 's'}" if jobs_failed > 0 else ""
|
@@ -115,7 +113,7 @@ def print_end_message(jobs_completed, jobs_failed):
|
|
115 |
gpu="A100", # gpu="H100"
|
116 |
# more about modal timeouts: https://modal.com/docs/guide/timeouts
|
117 |
timeout=7200, # 2 hours, increase or decrease if needed
|
118 |
-
secrets=[hf_secret
|
119 |
)
|
120 |
def main(config_file_list_str: str, recover: bool = False, name: str = None):
|
121 |
# Các secrets sẽ tự động được inject vào environment variables
|
@@ -166,21 +164,6 @@ def main(config_file_list_str: str, recover: bool = False, name: str = None):
|
|
166 |
os.makedirs(MOUNT_DIR, exist_ok=True)
|
167 |
print(f"Training outputs will be saved to: {MOUNT_DIR}")
|
168 |
|
169 |
-
# setup wandb
|
170 |
-
if config['config']['process'][0]['logging']['use_wandb']:
|
171 |
-
wandb_token = os.environ.get('WANDB_API_KEY', None)
|
172 |
-
if wandb_token:
|
173 |
-
wandb_logger = WandbLogger(
|
174 |
-
project="flux-lora-training",
|
175 |
-
run_name=name,
|
176 |
-
config=job.raw_config,
|
177 |
-
)
|
178 |
-
job.meta["wandb"] = wandb_logger.run.id
|
179 |
-
job.process[0].logger = wandb_logger
|
180 |
-
else:
|
181 |
-
print("WandB token not found, skipping WandB logging")
|
182 |
-
config['config']['process'][0]['logging']['use_wandb'] = False # disable if no key was given
|
183 |
-
|
184 |
# handle dataset zip
|
185 |
datasets = config['config']['process'][0].get('datasets', None)
|
186 |
if datasets is not None and isinstance(datasets, list):
|
@@ -208,9 +191,6 @@ def main(config_file_list_str: str, recover: bool = False, name: str = None):
|
|
208 |
|
209 |
# run the job
|
210 |
job.run()
|
211 |
-
|
212 |
-
if config['config']['process'][0]['logging']['use_wandb']:
|
213 |
-
wandb_logger.finish()
|
214 |
|
215 |
# commit the volume after training
|
216 |
model_volume.commit()
|
|
|
23 |
os.environ['DISABLE_TELEMETRY'] = 'YES'
|
24 |
# Khai báo secrets
|
25 |
hf_secret = modal.Secret.from_name("huggingface-secret")
|
|
|
26 |
|
27 |
# define the volume for storing model outputs, using "creating volumes lazily": https://modal.com/docs/guide/volumes
|
28 |
# you will find your model, samples and optimizer stored in: https://modal.com/storage/your-username/main/flux-lora-models
|
|
|
92 |
|
93 |
import argparse
|
94 |
from toolkit.job import get_job
|
|
|
95 |
|
96 |
def print_end_message(jobs_completed, jobs_failed):
|
97 |
failure_string = f"{jobs_failed} failure{'' if jobs_failed == 1 else 's'}" if jobs_failed > 0 else ""
|
|
|
113 |
gpu="A100", # gpu="H100"
|
114 |
# more about modal timeouts: https://modal.com/docs/guide/timeouts
|
115 |
timeout=7200, # 2 hours, increase or decrease if needed
|
116 |
+
secrets=[hf_secret]
|
117 |
)
|
118 |
def main(config_file_list_str: str, recover: bool = False, name: str = None):
|
119 |
# Các secrets sẽ tự động được inject vào environment variables
|
|
|
164 |
os.makedirs(MOUNT_DIR, exist_ok=True)
|
165 |
print(f"Training outputs will be saved to: {MOUNT_DIR}")
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
# handle dataset zip
|
168 |
datasets = config['config']['process'][0].get('datasets', None)
|
169 |
if datasets is not None and isinstance(datasets, list):
|
|
|
191 |
|
192 |
# run the job
|
193 |
job.run()
|
|
|
|
|
|
|
194 |
|
195 |
# commit the volume after training
|
196 |
model_volume.commit()
|