Xenova HF staff commited on
Commit
8b5cd60
·
verified ·
1 Parent(s): 4e4527c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +84 -0
README.md CHANGED
@@ -59,6 +59,90 @@ processor = AutoProcessor.from_pretrained(model_id)
59
  print(model.num_parameters()) # 7751525
60
  ```
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  ## Model Details
64
 
 
59
  print(model.num_parameters()) # 7751525
60
  ```
61
 
62
+ ## Code to export to ONNX
63
+
64
+ ```python
65
+ import requests
66
+
67
+ import torch
68
+ from PIL import Image
69
+ from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
70
+ from transformers.models.grounding_dino.modeling_grounding_dino import (
71
+ GroundingDinoObjectDetectionOutput,
72
+ )
73
+
74
+ # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::__ior_' to ONNX opset version 16 is not supported.
75
+ # Please feel free to request support or submit a pull request on PyTorch GitHub: https://github.com/pytorch/pytorch/issues.
76
+ torch.Tensor.__ior__ = lambda self, other: self.__or__(other)
77
+
78
+ # model_id = "IDEA-Research/grounding-dino-tiny"
79
+ model_id = "hf-internal-testing/tiny-random-GroundingDinoForObjectDetection"
80
+ processor = AutoProcessor.from_pretrained(model_id)
81
+ model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id)
82
+
83
+ old_forward = model.forward
84
+ def new_forward(*args, **kwargs):
85
+ output = old_forward(*args, **kwargs, return_dict=True)
86
+ # Only return the logits and pred_boxes
87
+ return GroundingDinoObjectDetectionOutput(
88
+ logits=output.logits, pred_boxes=output.pred_boxes
89
+ )
90
+ model.forward = new_forward
91
+
92
+ image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
93
+ image = Image.open(requests.get(image_url, stream=True).raw).resize((800, 800))
94
+ text = "a cat." # NB: text query need to be lowercased + end with a dot
95
+
96
+ # Run python model
97
+ inputs = processor(images=image, text=text, return_tensors="pt")
98
+ with torch.no_grad():
99
+ outputs = model(**inputs)
100
+ results = processor.post_process_grounded_object_detection(
101
+ outputs,
102
+ inputs.input_ids,
103
+ box_threshold=0.4,
104
+ text_threshold=0.3,
105
+ target_sizes=[image.size[::-1]],
106
+ )
107
+
108
+ text_axes = {
109
+ "input_ids": {1: "sequence_length"},
110
+ "token_type_ids": {1: "sequence_length"},
111
+ "attention_mask": {1: "sequence_length"},
112
+ }
113
+ image_axes = {}
114
+ output_axes = {
115
+ "logits": {1: "num_queries"},
116
+ "pred_boxes": {1: "num_queries"},
117
+ }
118
+ input_names = [
119
+ "pixel_values",
120
+ "input_ids",
121
+ "token_type_ids",
122
+ "attention_mask",
123
+ "pixel_mask",
124
+ ]
125
+
126
+ # Input to the model
127
+ x = tuple(inputs[key] for key in input_names)
128
+
129
+ # Export the model
130
+ torch.onnx.export(
131
+ model, # model being run
132
+ x, # model input (or a tuple for multiple inputs)
133
+ "model.onnx", # where to save the model (can be a file or file-like object)
134
+ export_params=True, # store the trained parameter weights inside the model file
135
+ opset_version=16, # the ONNX version to export the model to
136
+ do_constant_folding=True, # whether to execute constant folding for optimization
137
+ input_names=input_names,
138
+ output_names=list(output_axes.keys()),
139
+ dynamic_axes={
140
+ **text_axes,
141
+ **image_axes,
142
+ **output_axes,
143
+ },
144
+ )
145
+ ```
146
 
147
  ## Model Details
148