BiRefNet_plus

Build error

App Files Files Community

ginipick commited on 10 days ago

Commit

adfe191

verified ·

1 Parent(s): ec38b03

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -74

app.py CHANGED Viewed

@@ -25,30 +25,49 @@ from transformers import (
     AutoConfig,
     AutoModelForImageSegmentation,
 )
-# 1) Config를 먼저 로드하여 tie_weights 충돌을 방지
 config = AutoConfig.from_pretrained(
-    "zhengpeng7/BiRefNet",          # 👉 원하는 Hugging Face 모델 Repo
     trust_remote_code=True
 )
-# 2) config.get_text_config 에 더미 메서드 부여 (tie_word_embeddings=False)
 def dummy_get_text_config(decoder=True):
     return type("DummyTextConfig", (), {"tie_word_embeddings": False})()
 config.get_text_config = dummy_get_text_config
-# 3) 모델 구조만 만들기 (from_config) -> tie_weights 자동 호출 안 됨
 birefnet = AutoModelForImageSegmentation.from_config(config, trust_remote_code=True)
 birefnet.eval()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 birefnet.to(device)
 birefnet.half()
-# 4) state_dict 로드 (가중치) - 로컬 파일 사용 예시
-#    실제로는 hf_hub_download / snapshot_download 등으로 "model.safetensors"를 미리 받은 뒤 사용
-print("Loading BiRefNet weights from local file: model.safetensors")
-state_dict = torch.load("model.safetensors", map_location="cpu")  # 예시
 missing, unexpected = birefnet.load_state_dict(state_dict, strict=False)
 print("[Info] Missing keys:", missing)
 print("[Info] Unexpected keys:", unexpected)
@@ -56,7 +75,7 @@ torch.cuda.empty_cache()
 ##########################################################
-# 1. 이미지 후처리 함수들
 ##########################################################
 def refine_foreground(image, mask, r=90):
@@ -85,7 +104,6 @@ def FB_blur_fusion_foreground_estimator(image, F, B, alpha, r=90):
     F = np.clip(F, 0, 1)
     return F, blurred_B
 class ImagePreprocessor():
     def __init__(self, resolution: Tuple[int, int] = (1024, 1024)) -> None:
         self.transform_image = transforms.Compose([
@@ -99,7 +117,7 @@ class ImagePreprocessor():
 ##########################################################
-# 2. 예제 설정 및 유틸
 ##########################################################
 usage_to_weights_file = {
@@ -130,30 +148,24 @@ descriptions = (
     "We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access."
 )
 ##########################################################
-# 3. 추론 함수 (이미 로드된 birefnet 모델 사용)
 ##########################################################
 @spaces.GPU
 def predict(images, resolution, weights_file):
-    """
-    여기서는, 단일 birefnet 모델만 유지하고 있으며,
-    weight_file을 바꾸더라도 실제로는 이미 로드된 'birefnet' 모델만 사용.
-    (만약 다른 가중치를 로드하고 싶다면, 아래처럼 로컬 state_dict 교체 방식 추가 가능.)
-    """
     assert images is not None, 'Images cannot be None.'
-    # Resolution parse
     try:
-        w, h = resolution.strip().split('x')
-        w, h = int(int(w)//32*32), int(int(h)//32*32)
-        resolution_list = (w, h)
     except:
-        print('[WARN] Invalid resolution input. Fallback to 1024x1024.')
-        resolution_list = (1024, 1024)
-    # 이미지가 여러 장일 수 있으므로 리스트로 처리
     if isinstance(images, list):
         is_batch = True
         outputs, save_paths = [], []
@@ -164,65 +176,57 @@ def predict(images, resolution, weights_file):
         is_batch = False
     for idx, image_src in enumerate(images):
-        # str이면 파일 경로 혹은 URL
         if isinstance(image_src, str):
             if os.path.isfile(image_src):
                 image_ori = Image.open(image_src)
             else:
                 resp = requests.get(image_src)
                 image_ori = Image.open(BytesIO(resp.content))
-        # numpy 배열이면 Pillow 변환
         elif isinstance(image_src, np.ndarray):
             image_ori = Image.fromarray(image_src)
         else:
             image_ori = image_src.convert('RGB')
-        image = image_ori.convert('RGB')
-        preproc = ImagePreprocessor(resolution_list)
-        image_proc = preproc.proc(image).unsqueeze(0).to(device).half()
-        # 실제 추론
         with torch.inference_mode():
-            # 결과 맨 마지막 레이어 preds
             preds = birefnet(image_proc)[-1].sigmoid().cpu()
         pred_mask = preds[0].squeeze()
         # 후처리
         pred_pil = transforms.ToPILImage()(pred_mask)
-        image_masked = refine_foreground(image, pred_pil)
-        image_masked.putalpha(pred_pil.resize(image.size))
         if is_batch:
-            file_name = (
-                os.path.splitext(os.path.basename(image_src))[0]
-                if isinstance(image_src, str)
-                else f"img_{idx}"
-            )
-            out_path = os.path.join(save_dir, f"{file_name}.png")
-            image_masked.save(out_path)
-            save_paths.append(out_path)
             outputs.append(image_masked)
         else:
             outputs = [image_masked, image_ori]
         torch.cuda.empty_cache()
-    # 배치라면 갤러리 + ZIP 반환
     if is_batch:
-        zip_path = os.path.join(save_dir, f"{save_dir}.zip")
-        with zipfile.ZipFile(zip_path, 'w') as zipf:
             for fpath in save_paths:
                 zipf.write(fpath, os.path.basename(fpath))
-        return (save_paths, zip_path)
     else:
         return outputs
 ##########################################################
-# 4. Gradio UI
 ##########################################################
-# 커스텀 CSS
 css = """
 body {
     background: linear-gradient(135deg, #667eea, #764ba2);
@@ -280,14 +284,13 @@ button:hover, .btn:hover {
 title_html = """
 <h1 align="center" style="margin-bottom: 0.2em;">BiRefNet Demo (No Tie-Weights Crash)</h1>
 <p align="center" style="font-size:1.1em; color:#555;">
-    Using <code>from_config()</code> + local <code>state_dict</code> to bypass tie_weights issues
 </p>
 """
 with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
     gr.Markdown(title_html)
     with gr.Tabs():
-        # 탭 1: Image
         with gr.Tab("Image"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -297,13 +300,8 @@ with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
                     predict_btn = gr.Button("Predict")
                 with gr.Column(scale=2):
                     output_slider = ImageSlider(label="Result", type="pil")
-            gr.Examples(
-                examples=examples_image,
-                inputs=[image_input, resolution_input, weights_radio],
-                label="Examples"
-            )
-        # 탭 2: Text(URL)
         with gr.Tab("Text"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -313,36 +311,23 @@ with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
                     predict_btn_text = gr.Button("Predict")
                 with gr.Column(scale=2):
                     output_slider_text = ImageSlider(label="Result", type="pil")
-            gr.Examples(
-                examples=examples_text,
-                inputs=[image_url, resolution_input_text, weights_radio_text],
-                label="Examples"
-            )
-        # 탭 3: Batch
         with gr.Tab("Batch"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    file_input = gr.File(
-                        label="Upload Multiple Images",
-                        type="filepath",
-                        file_count="multiple"
-                    )
                     resolution_input_batch = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
                     weights_radio_batch = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn_batch = gr.Button("Predict")
                 with gr.Column(scale=2):
                     output_gallery = gr.Gallery(label="Results", scale=1)
                     zip_output = gr.File(label="Zip Download")
-            gr.Examples(
-                examples=examples_batch,
-                inputs=[file_input, resolution_input_batch, weights_radio_batch],
-                label="Examples"
-            )
     gr.Markdown("<p align='center'>Model by <a href='https://huggingface.co/ZhengPeng7/BiRefNet'>ZhengPeng7/BiRefNet</a></p>")
-    # 버튼 이벤트 연결
     predict_btn.click(
         fn=predict,
         inputs=[image_input, resolution_input, weights_radio],

     AutoConfig,
     AutoModelForImageSegmentation,
 )
+# Hugging Face Hub
+from huggingface_hub import hf_hub_download
+##########################################################
+# 1. Config 및 from_config() 초기화
+##########################################################
+# 1) Config만 먼저 로드
 config = AutoConfig.from_pretrained(
+    "zhengpeng7/BiRefNet",  # 예시
     trust_remote_code=True
 )
+# 2) config.get_text_config에 더미 메서드 부여 (tie_word_embeddings=False)
 def dummy_get_text_config(decoder=True):
     return type("DummyTextConfig", (), {"tie_word_embeddings": False})()
 config.get_text_config = dummy_get_text_config
+# 3) 모델 구조만 만들기
 birefnet = AutoModelForImageSegmentation.from_config(config, trust_remote_code=True)
 birefnet.eval()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 birefnet.to(device)
 birefnet.half()
+##########################################################
+# 2. 모델 가중치 다운로드 & 로드
+##########################################################
+# huggingface_hub에서 safetensors 또는 bin 파일 다운로드
+# (repo_id, filename 등은 실제 사용 환경에 맞게 변경)
+weights_path = hf_hub_download(
+    repo_id="zhengpeng7/BiRefNet",       # 예시
+    filename="model.safetensors",        # 또는 "pytorch_model.bin"
+    trust_remote_code=True
+)
+print("Downloaded weights to:", weights_path)
+# state_dict 로드
+print("Loading BiRefNet weights from HF Hub file:", weights_path)
+state_dict = torch.load(weights_path, map_location="cpu")
 missing, unexpected = birefnet.load_state_dict(state_dict, strict=False)
 print("[Info] Missing keys:", missing)
 print("[Info] Unexpected keys:", unexpected)
 ##########################################################
+# 3. 이미지 후처리 함수들
 ##########################################################
 def refine_foreground(image, mask, r=90):
     F = np.clip(F, 0, 1)
     return F, blurred_B
 class ImagePreprocessor():
     def __init__(self, resolution: Tuple[int, int] = (1024, 1024)) -> None:
         self.transform_image = transforms.Compose([
 ##########################################################
+# 4. 예제 설정 및 기타
 ##########################################################
 usage_to_weights_file = {
     "We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access."
 )
 ##########################################################
+# 5. 추론 함수 (이미 로드된 birefnet 모델 사용)
 ##########################################################
 @spaces.GPU
 def predict(images, resolution, weights_file):
+    # weights_file은 여기서는 무시하고, 이미 로드된 birefnet 사용
     assert images is not None, 'Images cannot be None.'
+    # Parse resolution
     try:
+        w, h = map(int, resolution.strip().split('x'))
+        w, h = int(w//32*32), int(h//32*32)
     except:
+        w, h = 1024, 1024
+    resolution_tuple = (w, h)
+    # 리스트인지 확인
     if isinstance(images, list):
         is_batch = True
         outputs, save_paths = [], []
         is_batch = False
     for idx, image_src in enumerate(images):
+        # 파일 경로 혹은 URL
         if isinstance(image_src, str):
             if os.path.isfile(image_src):
                 image_ori = Image.open(image_src)
             else:
                 resp = requests.get(image_src)
                 image_ori = Image.open(BytesIO(resp.content))
+        # numpy array → PIL
         elif isinstance(image_src, np.ndarray):
             image_ori = Image.fromarray(image_src)
         else:
             image_ori = image_src.convert('RGB')
+        # 전처리
+        preproc = ImagePreprocessor(resolution_tuple)
+        image_proc = preproc.proc(image_ori.convert('RGB')).unsqueeze(0).to(device).half()
+        # 추론
         with torch.inference_mode():
             preds = birefnet(image_proc)[-1].sigmoid().cpu()
         pred_mask = preds[0].squeeze()
         # 후처리
         pred_pil = transforms.ToPILImage()(pred_mask)
+        image_masked = refine_foreground(image_ori, pred_pil)
+        image_masked.putalpha(pred_pil.resize(image_ori.size))
         if is_batch:
+            fbase = (os.path.splitext(os.path.basename(image_src))[0] if isinstance(image_src, str) else f"img_{idx}")
+            outpath = os.path.join(save_dir, f"{fbase}.png")
+            image_masked.save(outpath)
+            save_paths.append(outpath)
             outputs.append(image_masked)
         else:
             outputs = [image_masked, image_ori]
         torch.cuda.empty_cache()
     if is_batch:
+        zippath = os.path.join(save_dir, f"{save_dir}.zip")
+        with zipfile.ZipFile(zippath, 'w') as zipf:
             for fpath in save_paths:
                 zipf.write(fpath, os.path.basename(fpath))
+        return outputs, zippath
     else:
         return outputs
 ##########################################################
+# 6. Gradio UI
 ##########################################################
 css = """
 body {
     background: linear-gradient(135deg, #667eea, #764ba2);
 title_html = """
 <h1 align="center" style="margin-bottom: 0.2em;">BiRefNet Demo (No Tie-Weights Crash)</h1>
 <p align="center" style="font-size:1.1em; color:#555;">
+    Using <code>from_config()</code> + local <code>state_dict</code> or <code>hf_hub_download</code> to bypass tie_weights issues
 </p>
 """
 with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
     gr.Markdown(title_html)
     with gr.Tabs():
         with gr.Tab("Image"):
             with gr.Row():
                 with gr.Column(scale=1):
                     predict_btn = gr.Button("Predict")
                 with gr.Column(scale=2):
                     output_slider = ImageSlider(label="Result", type="pil")
+            gr.Examples(examples=examples_image, inputs=[image_input, resolution_input, weights_radio], label="Examples")
         with gr.Tab("Text"):
             with gr.Row():
                 with gr.Column(scale=1):
                     predict_btn_text = gr.Button("Predict")
                 with gr.Column(scale=2):
                     output_slider_text = ImageSlider(label="Result", type="pil")
+            gr.Examples(examples=examples_text, inputs=[image_url, resolution_input_text, weights_radio_text], label="Examples")
         with gr.Tab("Batch"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    file_input = gr.File(label="Upload Multiple Images", type="filepath", file_count="multiple")
                     resolution_input_batch = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
                     weights_radio_batch = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn_batch = gr.Button("Predict")
                 with gr.Column(scale=2):
                     output_gallery = gr.Gallery(label="Results", scale=1)
                     zip_output = gr.File(label="Zip Download")
+            gr.Examples(examples=examples_batch, inputs=[file_input, resolution_input_batch, weights_radio_batch], label="Examples")
     gr.Markdown("<p align='center'>Model by <a href='https://huggingface.co/ZhengPeng7/BiRefNet'>ZhengPeng7/BiRefNet</a></p>")
+    # 이벤트 연결
     predict_btn.click(
         fn=predict,
         inputs=[image_input, resolution_input, weights_radio],