alessandro trinca tornidor commited on
Commit
88b60fb
·
1 Parent(s): 8c42585

[feat] add .idea files, update README.md

Browse files
.idea/LISA_REFACTOR.iml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PyDocumentationSettings">
9
+ <option name="format" value="GOOGLE" />
10
+ <option name="myDocStringFormat" value="Google" />
11
+ </component>
12
+ <component name="SonarLintModuleSettings">
13
+ <option name="uniqueId" value="875f314e-5ed1-4106-8048-37fa08d9c6e3" />
14
+ </component>
15
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ </profile>
6
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/LISA_REFACTOR.iml" filepath="$PROJECT_DIR$/.idea/LISA_REFACTOR.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
README.md CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  [![Gradio](https://img.shields.io/badge/Gradio-Online%20Demo-blue)](http://103.170.5.190:7860/)
2
  [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/app-center/openxlab_app.svg)](https://openxlab.org.cn/apps/detail/openxlab-app/LISA)
3
 
@@ -7,7 +18,7 @@
7
 
8
  <font size=7><div align='center' > <a href=https://arxiv.org/pdf/2308.00692.pdf>**Paper**</a> | <a href="https://huggingface.co/xinlai">**Models**</a> | [**Training**](#training) | [**Inference**](#inference) | [**Local Deployment**](#deployment) | [**Dataset**](#dataset) | <a href="http://103.170.5.190:7860/">**Online Demo**</a></div></font>
9
 
10
- <!-- <p align="center"> <img src="imgs/teaser.jpg" width="100%"> </p> -->
11
 
12
  <table class="center">
13
  <tr>
@@ -17,9 +28,9 @@
17
  <td style="text-align:center;"><b>Output</b></td>
18
  </tr>
19
  <tr>
20
- <td><img src="imgs/obama.jpg"></td>
21
  <td><img src="vis_output/obama.jpg"></td>
22
- <td><img src="imgs/trump.jpg"></td>
23
  <td><img src="vis_output/trump.jpg"></td>
24
  </tr>
25
  <tr>
@@ -30,9 +41,9 @@
30
  </tr>
31
 
32
  <tr>
33
- <td><img src="imgs/stand_higher.jpg"></td>
34
  <td><img src="vis_output/stand_higher.jpg"></td>
35
- <td><img src="imgs/camera_lens.jpg"></td>
36
  <td><img src="vis_output/camera_lens.jpg"></td>
37
  </tr>
38
  <tr>
@@ -43,9 +54,9 @@
43
  </tr>
44
 
45
  <tr>
46
- <td><img src="imgs/dog_with_horn.jpg"></td>
47
  <td><img src="vis_output/dog_with_horn.jpg"></td>
48
- <td><img src="imgs/wash_hands.jpg"></td>
49
  <td><img src="vis_output/wash_hands.jpg"></td>
50
  </tr>
51
  <tr>
@@ -56,9 +67,9 @@
56
  </tr>
57
 
58
  <tr>
59
- <td><img src="imgs/jackma.jpg"></td>
60
  <td><img src="vis_output/jackma.jpg"></td>
61
- <td><img src="imgs/blackpink.jpg"></td>
62
  <td><img src="vis_output/blackpink.jpg"></td>
63
  </tr>
64
  <tr>
@@ -69,7 +80,7 @@
69
  </tr>
70
  </table>
71
 
72
- <p align="center"> <img src="imgs/fig_overview.jpg" width="100%"> </p>
73
 
74
  ## News
75
  - [x] [2023.8.30] Release three new models [LISA-7B-v1](https://huggingface.co/xinlai/LISA-7B-v1), [LISA-7B-v1-explanatory](https://huggingface.co/xinlai/LISA-7B-v1-explanatory), and [LISA-13B-llama2-v1-explanatory](https://huggingface.co/xinlai/LISA-13B-llama2-v1-explanatory). Welcome to check them out!
@@ -103,7 +114,7 @@ For more details, please refer to the [paper](https://arxiv.org/abs/2308.00692).
103
  **LISA** also demonstrates robust zero-shot capability when trained exclusively on reasoning-free datasets. In addition, fine-tuning the model with merely 239 reasoning segmentation image-instruction pairs results in further performance enhancement.
104
 
105
  ## Experimental results
106
- <p align="center"> <img src="imgs/table1.jpg" width="80%"> </p>
107
 
108
  ## Installation
109
  ```
@@ -131,44 +142,44 @@ Download them from the above links, and organize them as follows.
131
 
132
  ```
133
  ├── dataset
134
-    ├── ade20k
135
-       ├── annotations
136
-       └── images
137
-    ├── coco
138
-       └── train2017
139
-       ├── 000000000009.jpg
140
-       └── ...
141
-    ├── cocostuff
142
-       └── train2017
143
-       ├── 000000000009.png
144
-       └── ...
145
-    ├── llava_dataset
146
-       └── llava_instruct_150k.json
147
-    ├── mapillary
148
-       ├── config_v2.0.json
149
-       ├── testing
150
-       ├── training
151
-       └── validation
152
-    ├── reason_seg
153
-       └── ReasonSeg
154
-       ├── train
155
-       ├── val
156
-       └── explanatory
157
-    ├── refer_seg
158
-       ├── images
159
-       | ├── saiapr_tc-12
160
-       | └── mscoco
161
-       | └── images
162
-       | └── train2014
163
-       ├── refclef
164
-       ├── refcoco
165
-       ├── refcoco+
166
-       └── refcocog
167
-    └── vlpart
168
-    ├── paco
169
  │ │ └── annotations
170
-    └── pascal_part
171
-    ├── train.json
172
  │ └── VOCdevkit
173
  ```
174
 
@@ -253,7 +264,7 @@ After that, input the text prompt and then the image path. For example,
253
  - Please input the image path: imgs/example2.jpg
254
  ```
255
  The results should be like:
256
- <p align="center"> <img src="imgs/example1.jpg" width="22%"> <img src="vis_output/example1_masked_img_0.jpg" width="22%"> <img src="imgs/example2.jpg" width="25%"> <img src="vis_output/example2_masked_img_0.jpg" width="25%"> </p>
257
 
258
  ## Deployment
259
  ```
 
1
+ ---
2
+ title: Lisa On Gpu
3
+ emoji: 📊
4
+ colorFrom: yellow
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ (Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference)
11
+
12
  [![Gradio](https://img.shields.io/badge/Gradio-Online%20Demo-blue)](http://103.170.5.190:7860/)
13
  [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/app-center/openxlab_app.svg)](https://openxlab.org.cn/apps/detail/openxlab-app/LISA)
14
 
 
18
 
19
  <font size=7><div align='center' > <a href=https://arxiv.org/pdf/2308.00692.pdf>**Paper**</a> | <a href="https://huggingface.co/xinlai">**Models**</a> | [**Training**](#training) | [**Inference**](#inference) | [**Local Deployment**](#deployment) | [**Dataset**](#dataset) | <a href="http://103.170.5.190:7860/">**Online Demo**</a></div></font>
20
 
21
+ <!-- <p align="center"> <img src="resources/imgs/teaser.jpg" width="100%"> </p> -->
22
 
23
  <table class="center">
24
  <tr>
 
28
  <td style="text-align:center;"><b>Output</b></td>
29
  </tr>
30
  <tr>
31
+ <td><img src="resources/imgs/obama.jpg"></td>
32
  <td><img src="vis_output/obama.jpg"></td>
33
+ <td><img src="resources/imgs/trump.jpg"></td>
34
  <td><img src="vis_output/trump.jpg"></td>
35
  </tr>
36
  <tr>
 
41
  </tr>
42
 
43
  <tr>
44
+ <td><img src="resources/imgs/stand_higher.jpg"></td>
45
  <td><img src="vis_output/stand_higher.jpg"></td>
46
+ <td><img src="resources/imgs/camera_lens.jpg"></td>
47
  <td><img src="vis_output/camera_lens.jpg"></td>
48
  </tr>
49
  <tr>
 
54
  </tr>
55
 
56
  <tr>
57
+ <td><img src="resources/imgs/dog_with_horn.jpg"></td>
58
  <td><img src="vis_output/dog_with_horn.jpg"></td>
59
+ <td><img src="resources/imgs/wash_hands.jpg"></td>
60
  <td><img src="vis_output/wash_hands.jpg"></td>
61
  </tr>
62
  <tr>
 
67
  </tr>
68
 
69
  <tr>
70
+ <td><img src="resources/imgs/jackma.jpg"></td>
71
  <td><img src="vis_output/jackma.jpg"></td>
72
+ <td><img src="resources/imgs/blackpink.jpg"></td>
73
  <td><img src="vis_output/blackpink.jpg"></td>
74
  </tr>
75
  <tr>
 
80
  </tr>
81
  </table>
82
 
83
+ <p align="center"> <img src="resources/imgs/fig_overview.jpg" width="100%"> </p>
84
 
85
  ## News
86
  - [x] [2023.8.30] Release three new models [LISA-7B-v1](https://huggingface.co/xinlai/LISA-7B-v1), [LISA-7B-v1-explanatory](https://huggingface.co/xinlai/LISA-7B-v1-explanatory), and [LISA-13B-llama2-v1-explanatory](https://huggingface.co/xinlai/LISA-13B-llama2-v1-explanatory). Welcome to check them out!
 
114
  **LISA** also demonstrates robust zero-shot capability when trained exclusively on reasoning-free datasets. In addition, fine-tuning the model with merely 239 reasoning segmentation image-instruction pairs results in further performance enhancement.
115
 
116
  ## Experimental results
117
+ <p align="center"> <img src="resources/imgs/table1.jpg" width="80%"> </p>
118
 
119
  ## Installation
120
  ```
 
142
 
143
  ```
144
  ├── dataset
145
+ ├── ade20k
146
+ ├── annotations
147
+ └── images
148
+ ├── coco
149
+ └── train2017
150
+ ├── 000000000009.jpg
151
+ └── ...
152
+ ├── cocostuff
153
+ └── train2017
154
+ ├── 000000000009.png
155
+ └── ...
156
+ ├── llava_dataset
157
+ └── llava_instruct_150k.json
158
+ ├── mapillary
159
+ ├── config_v2.0.json
160
+ ├── testing
161
+ ├── training
162
+ └── validation
163
+ ├── reason_seg
164
+ └── ReasonSeg
165
+ ├── train
166
+ ├── val
167
+ └── explanatory
168
+ ├── refer_seg
169
+ ├── images
170
+ | ├── saiapr_tc-12
171
+ | └── mscoco
172
+ | └── images
173
+ | └── train2014
174
+ ├── refclef
175
+ ├── refcoco
176
+ ├── refcoco+
177
+ └── refcocog
178
+ └── vlpart
179
+ ├── paco
180
  │ │ └── annotations
181
+ └── pascal_part
182
+ ├── train.json
183
  │ └── VOCdevkit
184
  ```
185
 
 
264
  - Please input the image path: imgs/example2.jpg
265
  ```
266
  The results should be like:
267
+ <p align="center"> <img src="resources/imgs/example1.jpg" width="22%"> <img src="vis_output/example1_masked_img_0.jpg" width="22%"> <img src="resources/imgs/example2.jpg" width="25%"> <img src="vis_output/example2_masked_img_0.jpg" width="25%"> </p>
268
 
269
  ## Deployment
270
  ```