Thouph
/

eva02-clip-vit-large-7704

ONNX

Model card Files Files and versions Community

Thouph commited on Nov 24, 2023

Commit

fc68fe3

1 Parent(s): 5033331

Upload batched_inference.py

Browse files

Files changed (1) hide show

batched_inference.py +104 -112

batched_inference.py CHANGED Viewed

@@ -1,7 +1,4 @@
-import csv
 import torch.multiprocessing as multiprocessing
-import pandas as pd
-import numpy as np
 import torchvision.transforms as transforms
 from torch import autocast
 from torch.utils.data import Dataset, DataLoader
@@ -9,79 +6,84 @@ from PIL import Image
 import torch
 from torchvision.transforms import InterpolationMode
 from tqdm import tqdm
-import random
 import json
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
 torch.autograd.set_detect_anomaly(False)
 torch.autograd.profiler.emit_nvtx(enabled=False)
 torch.autograd.profiler.profile(enabled=False)
 torch.backends.cudnn.benchmark = True
 class ImageDataset(Dataset):
-    def __init__(self, csv_file, train, base_path):
-        self.csv_file = csv_file
-        self.train = train
-        self.all_image_names = self.csv_file[:]['md5'].apply(str)
-        self.all_image_ext = self.csv_file[:]['file_ext'].apply(str)
-        self.train_size = len(self.csv_file)
-        self.base_path = base_path
-        if self.train == True:
-            print(f"Number of training images: {self.train_size}")
-            self.thin_transform = transforms.Compose([
-                transforms.Resize(224, interpolation=InterpolationMode.BICUBIC),
-                transforms.CenterCrop(224),
-                transforms.ToTensor(),
-                transforms.Normalize(mean=[
-                    0.48145466,
-                    0.4578275,
-                    0.40821073
-                ], std=[
-                    0.26862954,
-                    0.26130258,
-                    0.27577711
-                ])  # Normalize image
-            ])
-            self.normal_transform = transforms.Compose([
-                transforms.Resize((224, 224), interpolation=InterpolationMode.BICUBIC),
-                transforms.ToTensor(),
-                transforms.Normalize(mean=[
-                    0.48145466,
-                    0.4578275,
-                    0.40821073
-                ], std=[
-                    0.26862954,
-                    0.26130258,
-                    0.27577711
-                ])  # Normalize image
-            ])
     def __len__(self):
-        return len(self.all_image_names)
     def __getitem__(self, index):
-        image = Image.open(self.base_path+"/"+str(self.all_image_names[index])+str(self.all_image_ext[index])).convert("RGB")
-        ratio = image.height/image.width
         if ratio > 2.0 or ratio < 0.5:
             image = self.thin_transform(image)
         else:
             image = self.normal_transform(image)
         return {
             'image': image,
-            "image_name": self.all_image_names[index]
         }
-def prepare_model():
-    model = torch.load("path/to/your/model.pth").to("cuda")
     model.to(memory_format=torch.channels_last)
     model = model.eval()
     return model
@@ -94,20 +96,19 @@ def train(tagging_is_running, model, dataloader, train_data, output_queue):
     with torch.no_grad():
         for i, data in tqdm(enumerate(dataloader), total=int(len(train_data) / dataloader.batch_size)):
-            data, image_names = data['image'].to("cuda"), data["image_name"]
             with autocast(device_type='cuda', dtype=torch.bfloat16):
-                outputs = model(data)
                 probabilities = torch.nn.functional.sigmoid(outputs)
-                output_queue.put((probabilities.to("cpu"), image_names))
             counter += 1
     _ = tagging_is_running.get()
     print("Tagging finished!")
-def tag_writer(tagging_is_running, output_queue, output_file_name):
     with open("tags.json", "r") as file:
         tags = json.load(file)
     allowed_tags = sorted(tags)
@@ -116,78 +117,69 @@ def tag_writer(tagging_is_running, output_queue, output_file_name):
     tag_count = len(allowed_tags)
     assert tag_count == 7704, f"The length of loss scaling factor is not correct. Correct: 7704, current: {tag_count}"
-    with open(output_file_name, "w") as output_csv:
-        writer = csv.writer(output_csv)
-        writer.writerow(["image_name", "tags", "tag_probs"])
-        while not (tagging_is_running.qsize()>0 and output_queue.qsize()>0):
-            tag_probabilities, image_names = output_queue.get()
-            tag_probabilities = tag_probabilities.tolist()
-            for per_image_tag_probabilities,image_name  in zip(tag_probabilities, image_names, strict=True):
-                this_image_tags = []
-                this_image_tag_probabilities = []
-                for index, per_tag_probability in enumerate(per_image_tag_probabilities):
-                    if per_tag_probability > 0.3:
-                        tag = allowed_tags[index]
-                        if "placeholder" not in tag:
-                            this_image_tags.append(tag)
-                            this_image_tag_probabilities.append(str(int(round(per_tag_probability, 3) * 1000)))
-                image_row = [image_name," ".join(this_image_tags)," ".join(this_image_tag_probabilities)]
-                writer.writerow(image_row)
-def set_seed(seed: int = 42) -> None:
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed(seed)
-    # When running on the CuDNN backend, two further options must be set
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-    # Set a fixed value for the hash seed
-    print(f"Random seed set as {seed}")
-if __name__ == "__main__":
-    steps = 0
-    output_file_name = "your_file.csv"
-    set_seed()
     multiprocessing.set_start_method('spawn')
     output_queue = multiprocessing.Queue()
     tagging_is_running = multiprocessing.Queue(maxsize=5)
     tagging_is_running.put("Running!")
     # initialize the computation device
-    if torch.cuda.is_available():
-        device = torch.device('cuda')
-    else:
         raise RuntimeError("CUDA is not available!")
-    model = prepare_model().to("cuda")
-    batch_size = 128
     # read the training csv file
-    train_csv = pd.read_csv('/path/to/a/list/of/files/and/their/extensions.csv')
     # train dataset
-    train_data = ImageDataset(
-        train_csv, train=True
-    )
-    train_loader = DataLoader(
-        train_data,
         batch_size=batch_size,
         shuffle=False,
-        num_workers=6,
-        pin_memory=True
     )
-    process_writer = multiprocessing.Process(target=tag_writer, args=(tagging_is_running, output_queue, output_file_name))
     process_writer.start()
-    process_tagger = multiprocessing.Process(target=train, args=(tagging_is_running, model, train_loader, train_data, output_queue,))
     process_tagger.start()
     process_writer.join()
     process_tagger.join()

 import torch.multiprocessing as multiprocessing
 import torchvision.transforms as transforms
 from torch import autocast
 from torch.utils.data import Dataset, DataLoader
 import torch
 from torchvision.transforms import InterpolationMode
 from tqdm import tqdm
 import json
+import os
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
 torch.autograd.set_detect_anomaly(False)
 torch.autograd.profiler.emit_nvtx(enabled=False)
 torch.autograd.profiler.profile(enabled=False)
 torch.backends.cudnn.benchmark = True
 class ImageDataset(Dataset):
+    def __init__(self, image_folder_path, allowed_extensions):
+        self.allowed_extensions = allowed_extensions
+        self.all_image_paths, self.all_image_names, self.image_base_paths = self.get_image_paths(image_folder_path)
+        self.train_size = len(self.all_image_paths)
+        print(f"Number of images to be tagged: {self.train_size}")
+        self.thin_transform = transforms.Compose([
+            transforms.Resize(224, interpolation=InterpolationMode.BICUBIC),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[
+                0.48145466,
+                0.4578275,
+                0.40821073
+            ], std=[
+                0.26862954,
+                0.26130258,
+                0.27577711
+            ])  # Normalize image
+        ])
+        self.normal_transform = transforms.Compose([
+            transforms.Resize((224, 224), interpolation=InterpolationMode.BICUBIC),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[
+                0.48145466,
+                0.4578275,
+                0.40821073
+            ], std=[
+                0.26862954,
+                0.26130258,
+                0.27577711
+            ])  # Normalize image
+        ])
+    def get_image_paths(self, folder_path):
+        image_paths = []
+        image_file_names = []
+        image_base_paths = []
+        for root, dirs, files in os.walk(folder_path):
+            for file in files:
+                if file.lower().split(".")[-1] in self.allowed_extensions:
+                    image_paths.append((os.path.abspath(os.path.join(root, file))))
+                    image_file_names.append(file.split(".")[0])
+                    image_base_paths.append(root)
+        return image_paths, image_file_names, image_base_paths
     def __len__(self):
+        return len(self.all_image_paths)
     def __getitem__(self, index):
+        image = Image.open(self.all_image_paths[index]).convert("RGB")
+        ratio = image.height / image.width
         if ratio > 2.0 or ratio < 0.5:
             image = self.thin_transform(image)
         else:
             image = self.normal_transform(image)
         return {
             'image': image,
+            "image_name": self.all_image_names[index],
+            "image_root": self.image_base_paths[index]
         }
+def prepare_model(model_path: str):
+    model = torch.load(model_path)
     model.to(memory_format=torch.channels_last)
     model = model.eval()
     return model
     with torch.no_grad():
         for i, data in tqdm(enumerate(dataloader), total=int(len(train_data) / dataloader.batch_size)):
+            this_data = data['image'].to("cuda")
             with autocast(device_type='cuda', dtype=torch.bfloat16):
+                outputs = model(this_data)
                 probabilities = torch.nn.functional.sigmoid(outputs)
+                output_queue.put((probabilities.to("cpu"), data["image_name"], data["image_root"]))
             counter += 1
     _ = tagging_is_running.get()
     print("Tagging finished!")
+def tag_writer(tagging_is_running, output_queue, threshold):
     with open("tags.json", "r") as file:
         tags = json.load(file)
     allowed_tags = sorted(tags)
     tag_count = len(allowed_tags)
     assert tag_count == 7704, f"The length of loss scaling factor is not correct. Correct: 7704, current: {tag_count}"
+    while not (tagging_is_running.qsize() > 0 and output_queue.qsize() > 0):
+        tag_probabilities, image_names, image_roots = output_queue.get()
+        tag_probabilities = tag_probabilities.tolist()
+        for per_image_tag_probabilities, image_name, image_root in zip(tag_probabilities, image_names, image_roots,
+                                                                       strict=True):
+            this_image_tags = []
+            this_image_tag_probabilities = []
+            for index, per_tag_probability in enumerate(per_image_tag_probabilities):
+                if per_tag_probability > threshold:
+                    tag = allowed_tags[index]
+                    if "placeholder" not in tag:
+                        this_image_tags.append(tag)
+                        this_image_tag_probabilities.append(str(int(round(per_tag_probability, 3) * 1000)))
+            output_file = os.path.join(image_root, os.path.splitext(image_name)[0] + ".txt")
+            with open(output_file, "w", encoding="utf-8") as this_output:
+                this_output.write(" ".join(this_image_tags))
+                this_output.write("\n")
+                this_output.write(" ".join(this_image_tag_probabilities))
+def main():
+    image_folder_path = "/path/to/your/folder/"
+    # all images should be in this folder and/or its subfolders.
+    # I will generate a text file for every image.
+    model_path = "/path/to/your/model.pth"
+    allowed_extensions = {"jpg", "jpeg", "png", "webp"}
+    batch_size = 64
+    # if you have a 24GB card, you can try 256
+    threshold = 0.3
     multiprocessing.set_start_method('spawn')
     output_queue = multiprocessing.Queue()
     tagging_is_running = multiprocessing.Queue(maxsize=5)
     tagging_is_running.put("Running!")
     # initialize the computation device
+    if not torch.cuda.is_available():
         raise RuntimeError("CUDA is not available!")
+    model = prepare_model(model_path).to("cuda")
     # read the training csv file
     # train dataset
+    dataset = ImageDataset(image_folder_path, allowed_extensions)
+    batched_loader = DataLoader(
+        dataset,
         batch_size=batch_size,
         shuffle=False,
+        num_workers=6,  # if you have a big batch size, a good cpu, and enough cpu memory, try 12
+        pin_memory=True,
+        drop_last=False,
     )
+    process_writer = multiprocessing.Process(target=tag_writer,
+                                             args=(tagging_is_running, output_queue, threshold))
     process_writer.start()
+    process_tagger = multiprocessing.Process(target=train,
+                                             args=(tagging_is_running, model, batched_loader, dataset, output_queue,))
     process_tagger.start()
     process_writer.join()
     process_tagger.join()
+if __name__ == "__main__":
+    main()