test3
#8
by
amirsala7
- opened
- .gitattributes +0 -2
- README.md +0 -6
- modeling_kosmos2.py +1 -2
- pikachu.png +0 -3
- pikachu.webp +0 -0
- pikachu_bbox.png +0 -3
- tokenization_kosmos2_fast.py +1 -4
.gitattributes
CHANGED
@@ -34,5 +34,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
snowman.png filter=lfs diff=lfs merge=lfs -text
|
37 |
-
pikachu.png filter=lfs diff=lfs merge=lfs -text
|
38 |
-
pikachu_bbox.png filter=lfs diff=lfs merge=lfs -text
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
snowman.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
README.md
CHANGED
@@ -5,12 +5,6 @@
|
|
5 |
---
|
6 |
# Kosmos-2: Grounding Multimodal Large Language Models to the World
|
7 |
|
8 |
-
**This model (remote code on the Hub) is deprecated. Please use https://huggingface.co/microsoft/kosmos-2-patch14-224**
|
9 |
-
|
10 |
-
**There are some changes in terms of input formats: see the model card in https://huggingface.co/microsoft/kosmos-2-patch14-224**
|
11 |
-
|
12 |
-
~~**(There is an on going effort to port `Kosmos-2` directly into `transformers`. This repository (remote code) might need some more bug fixes later, including breaking changes.)**~~
|
13 |
-
|
14 |
<a href="https://huggingface.co/ydshieh/kosmos-2-patch14-224/resolve/main/annotated_snowman.jpg" target="_blank"><figure><img src="https://huggingface.co/ydshieh/kosmos-2-patch14-224/resolve/main/annotated_snowman.jpg" width="384"><figcaption><b>[An image of a snowman warming himself by a fire.]</b></figcaption></figure></a>
|
15 |
|
16 |
|
|
|
5 |
---
|
6 |
# Kosmos-2: Grounding Multimodal Large Language Models to the World
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
<a href="https://huggingface.co/ydshieh/kosmos-2-patch14-224/resolve/main/annotated_snowman.jpg" target="_blank"><figure><img src="https://huggingface.co/ydshieh/kosmos-2-patch14-224/resolve/main/annotated_snowman.jpg" width="384"><figcaption><b>[An image of a snowman warming himself by a fire.]</b></figcaption></figure></a>
|
9 |
|
10 |
|
modeling_kosmos2.py
CHANGED
@@ -22,7 +22,6 @@ from typing import List, Optional, Tuple, Union
|
|
22 |
import torch
|
23 |
import torch.utils.checkpoint
|
24 |
from torch import nn
|
25 |
-
from torch.nn import CrossEntropyLoss
|
26 |
|
27 |
from transformers.activations import ACT2FN
|
28 |
from transformers.modeling_outputs import (
|
@@ -1008,7 +1007,7 @@ class Kosmos2TextTransformer(nn.Module):
|
|
1008 |
inputs_embeds = self.embed_tokens(input_ids)
|
1009 |
|
1010 |
if img_features is not None:
|
1011 |
-
inputs_embeds[img_input_mask.to(dtype=torch.bool)] = img_features
|
1012 |
|
1013 |
inputs_embeds = inputs_embeds * self.embed_scale
|
1014 |
|
|
|
22 |
import torch
|
23 |
import torch.utils.checkpoint
|
24 |
from torch import nn
|
|
|
25 |
|
26 |
from transformers.activations import ACT2FN
|
27 |
from transformers.modeling_outputs import (
|
|
|
1007 |
inputs_embeds = self.embed_tokens(input_ids)
|
1008 |
|
1009 |
if img_features is not None:
|
1010 |
+
inputs_embeds[img_input_mask.to(dtype=torch.bool)] = img_features
|
1011 |
|
1012 |
inputs_embeds = inputs_embeds * self.embed_scale
|
1013 |
|
pikachu.png
DELETED
Git LFS Details
|
pikachu.webp
DELETED
Binary file (35.4 kB)
|
|
pikachu_bbox.png
DELETED
Git LFS Details
|
tokenization_kosmos2_fast.py
CHANGED
@@ -137,6 +137,7 @@ class Kosmos2TokenizerFast(PreTrainedTokenizerFast):
|
|
137 |
)
|
138 |
|
139 |
self.vocab_file = vocab_file
|
|
|
140 |
|
141 |
self.eod_token = "</doc>"
|
142 |
|
@@ -178,10 +179,6 @@ class Kosmos2TokenizerFast(PreTrainedTokenizerFast):
|
|
178 |
# we need to set `special_tokens=False` to be the same as in the slow tokenizer.
|
179 |
self.add_tokens(AddedToken(token, lstrip=True, rstrip=False), special_tokens=False)
|
180 |
|
181 |
-
@property
|
182 |
-
def can_save_slow_tokenizer(self) -> bool:
|
183 |
-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
184 |
-
|
185 |
def build_inputs_with_special_tokens(
|
186 |
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
187 |
) -> List[int]:
|
|
|
137 |
)
|
138 |
|
139 |
self.vocab_file = vocab_file
|
140 |
+
self.can_save_slow_tokenizer = False if not self.vocab_file else True
|
141 |
|
142 |
self.eod_token = "</doc>"
|
143 |
|
|
|
179 |
# we need to set `special_tokens=False` to be the same as in the slow tokenizer.
|
180 |
self.add_tokens(AddedToken(token, lstrip=True, rstrip=False), special_tokens=False)
|
181 |
|
|
|
|
|
|
|
|
|
182 |
def build_inputs_with_special_tokens(
|
183 |
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
184 |
) -> List[int]:
|