wzk1015 commited on
Commit
a0ddf17
1 Parent(s): ad75055

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -1
README.md CHANGED
@@ -173,7 +173,7 @@ def load_image(image_file, input_size=448, max_num=12):
173
  pixel_values = torch.stack(pixel_values)
174
  return pixel_values
175
 
176
- # If you want to load a model using multiple GPUs, please refer to the `Multiple GPUs` section.
177
  path = 'OpenGVLab/Mono-InternVL-2B'
178
  model = AutoModel.from_pretrained(
179
  path,
@@ -225,6 +225,21 @@ If you find this project useful in your research, please consider citing:
225
  journal={arXiv preprint arXiv:2410.TODO},
226
  year={2024}
227
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  ```
229
 
230
 
@@ -298,4 +313,20 @@ Mono-InternVL在性能上优于当前最先进的MLLM Mini-InternVL-2B-1.5,并
298
  journal={arXiv preprint arXiv:2410.TODO},
299
  year={2024}
300
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  ```
 
173
  pixel_values = torch.stack(pixel_values)
174
  return pixel_values
175
 
176
+
177
  path = 'OpenGVLab/Mono-InternVL-2B'
178
  model = AutoModel.from_pretrained(
179
  path,
 
225
  journal={arXiv preprint arXiv:2410.TODO},
226
  year={2024}
227
  }
228
+
229
+ @article{chen2024far,
230
+ title={How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites},
231
+ author={Chen, Zhe and Wang, Weiyun and Tian, Hao and Ye, Shenglong and Gao, Zhangwei and Cui, Erfei and Tong, Wenwen and Hu, Kongzhi and Luo, Jiapeng and Ma, Zheng and others},
232
+ journal={arXiv preprint arXiv:2404.16821},
233
+ year={2024}
234
+ }
235
+
236
+ @inproceedings{chen2024internvl,
237
+ title={Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks},
238
+ author={Chen, Zhe and Wu, Jiannan and Wang, Wenhai and Su, Weijie and Chen, Guo and Xing, Sen and Zhong, Muyan and Zhang, Qinglong and Zhu, Xizhou and Lu, Lewei and others},
239
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
240
+ pages={24185--24198},
241
+ year={2024}
242
+ }
243
  ```
244
 
245
 
 
313
  journal={arXiv preprint arXiv:2410.TODO},
314
  year={2024}
315
  }
316
+
317
+ @article{chen2024far,
318
+ title={How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites},
319
+ author={Chen, Zhe and Wang, Weiyun and Tian, Hao and Ye, Shenglong and Gao, Zhangwei and Cui, Erfei and Tong, Wenwen and Hu, Kongzhi and Luo, Jiapeng and Ma, Zheng and others},
320
+ journal={arXiv preprint arXiv:2404.16821},
321
+ year={2024}
322
+ }
323
+
324
+ @inproceedings{chen2024internvl,
325
+ title={Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks},
326
+ author={Chen, Zhe and Wu, Jiannan and Wang, Wenhai and Su, Weijie and Chen, Guo and Xing, Sen and Zhong, Muyan and Zhang, Qinglong and Zhu, Xizhou and Lu, Lewei and others},
327
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
328
+ pages={24185--24198},
329
+ year={2024}
330
+ }
331
+
332
  ```