ddobokki commited on
Commit
fdef93d
β€’
1 Parent(s): b632735

add: README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -2
README.md CHANGED
@@ -1,4 +1,5 @@
1
- ```
 
2
  import requests
3
  from PIL import Image
4
  from transformers import (
@@ -7,18 +8,25 @@ from transformers import (
7
  PreTrainedTokenizerFast,
8
  )
9
 
 
10
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
11
  encoder_model_name_or_path = "ddobokki/vision-encoder-decoder-vit-gpt2-coco-ko"
12
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_model_name_or_path)
13
  tokenizer = PreTrainedTokenizerFast.from_pretrained(encoder_model_name_or_path)
 
 
14
  model = VisionEncoderDecoderModel.from_pretrained(encoder_model_name_or_path)
15
  model.to(device)
16
 
 
17
  url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
18
  with Image.open(requests.get(url, stream=True).raw) as img:
19
  pixel_values = feature_extractor(images=img, return_tensors="pt").pixel_values
20
 
21
  generated_ids = model.generate(pixel_values.to(device),num_beams=5)
22
  generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
23
- print(generated_text)
 
24
  ```
 
1
+ ## EXAMPLE
2
+ ```python
3
  import requests
4
  from PIL import Image
5
  from transformers import (
 
8
  PreTrainedTokenizerFast,
9
  )
10
 
11
+ # device setting
12
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
+
14
+ # load feature extractor and tokenizer
15
  encoder_model_name_or_path = "ddobokki/vision-encoder-decoder-vit-gpt2-coco-ko"
16
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_model_name_or_path)
17
  tokenizer = PreTrainedTokenizerFast.from_pretrained(encoder_model_name_or_path)
18
+
19
+ # load model
20
  model = VisionEncoderDecoderModel.from_pretrained(encoder_model_name_or_path)
21
  model.to(device)
22
 
23
+ # inference
24
  url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
25
  with Image.open(requests.get(url, stream=True).raw) as img:
26
  pixel_values = feature_extractor(images=img, return_tensors="pt").pixel_values
27
 
28
  generated_ids = model.generate(pixel_values.to(device),num_beams=5)
29
  generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
30
+
31
+ >> ['고양이 λ‘λ§ˆλ¦¬κ°€ μ†ŒνŒŒμ— λˆ„μ›Œ μžˆλ‹€.']
32
  ```