Spaces:

rjx
/

rjxai_image_identification

Sleeping

File size: 3,792 Bytes

242ac91
 
 
 
d5b8eea
242ac91
 
 
 
 
 
 
0098847
 
242ac91
 
 
 
 
 
 
 
 
 
 
e414a19
242ac91
 
36e5a07
242ac91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd5fe6f
0098847
bd5fe6f
242ac91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5b8eea
 
 
e1e83fc
 
d5b8eea
 
 
de5ad1a
 
 
 
 
 
 
 
e1e83fc
242ac91
d5b8eea
242ac91
 
 
 
a3ecec7

import torch
import supervision as sv
import cv2
import numpy as np
import os
from segment_anything import SamPredictor, sam_model_registry
from diffusers import StableDiffusionInpaintPipeline
from torchvision.ops import box_convert
from typing import List

class SelfSupervised:
    def __init__(self):
        from groundingdino.util.inference import load_model
        
        # -----Set Image and CUDA
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        # ------SAM Parameters
        self.model_type = "vit_h"
        self.predictor = SamPredictor(sam_model_registry[self.model_type](
            checkpoint="./weights/sam_vit_h_4b8939.pth").to(device=self.device))
        # ------Stable Diffusion
        self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
            "stabilityai/stable-diffusion-2-inpainting", torch_dtype=torch.float16,).to(self.device)
        # ----Grounding DINO
        self.groundingdino_model = load_model(
            "fengxai/config/GroundingDINO_SwinT_OGC.py", "weights/groundingdino_swint_ogc.pth")

    
    def checkAnnotate(self, image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str]):
        # 在原始图像中添加boxes
        h, w, _ = image_source.shape
        boxes = boxes * torch.Tensor([w, h, w, h])
        # 参考：https://pytorch.org/vision/main/generated/torchvision.ops.box_convert.html
        # xyxy: x1y1 为左上角，x2y2为右下角
        # cxcywh: 通过盒子的中心，cxcy为盒子的中心，wh为宽度和高度
        xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
        detections = sv.Detections(xyxy=xyxy)
        boxesHeight=int(xyxy[0][3]-xyxy[0][1])
        boxesWidth=int(xyxy[0][2]-xyxy[0][0])
        labels = [
            f"{phrase} {logit:.2f} w:{boxesWidth} h:{boxesHeight}"
            for phrase, logit in zip(phrases, logits)
        ]
        box_annotator = sv.BoxAnnotator()
        annotated_frame = cv2.cvtColor(image_source, cv2.COLOR_RGB2BGR)
        annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
        return annotated_frame, xyxy

    
    # 预测图片
    def imagePredict(self, imageFile, item="clothing", boxThreshold=0.3, textTreshold=0.25):
        from groundingdino.util.inference import load_image, predict
        src, img = load_image(imageFile)
        h, w, _ = src.shape
        boxes, logits, phrases = predict(
            model=self.groundingdino_model,
            image=img,
            caption=item,
            box_threshold=boxThreshold,
            text_threshold=textTreshold
        )
        # 查看annotate相关的信息
        imgAnnnotated, xyxy = self.checkAnnotate(
            image_source=src, boxes=boxes, logits=logits, phrases=phrases
        )
        imgAnnnotated = imgAnnnotated[..., ::-1]
        
        boxesHeight=int(xyxy[0][3]-xyxy[0][1])
        boxesWidth=int(xyxy[0][2]-xyxy[0][0])
        
        imageOutPutFile = "data/annotated_image.jpg"
        fileList = imageOutPutFile.split("/")[0]
        if not os.path.exists(fileList):
            print("fileList=", fileList)
            os.mkdir(fileList)
        cv2.imwrite(imageOutPutFile, imgAnnnotated)
        print("os cwd=", os.getcwd())
        for root, dirs, files in os.walk(os.getcwd()):
            print("root=", root)
            print("files=", files)
        
        print("data=")
        for root, dirs, files in os.walk("data/"):
            print("root=", root)
            print("files=", files)

        return {
            "imageOutput": imageOutPutFile,
            "imageHeight": h,
            "imageWidth": w,
            "objectHeight": boxesHeight,
            "objectWidth": boxesWidth
        }