Pittawat Taveekitworachai commited on
Commit
75ec781
·
0 Parent(s):

feat: add demo

Browse files
Files changed (2) hide show
  1. demo.py +115 -0
  2. requirements.txt +7 -0
demo.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (
2
+ Qwen2VLForConditionalGeneration,
3
+ AutoProcessor,
4
+ TextIteratorStreamer,
5
+ )
6
+ from PIL import Image
7
+ from threading import Thread
8
+ import gradio as gr
9
+
10
+ model_name = "scb10x/typhoon2-qwen2vl-7b-vision-instruct"
11
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
12
+ model_name, torch_dtype="auto", device_map="auto"
13
+ )
14
+ processor = AutoProcessor.from_pretrained(model_name)
15
+
16
+
17
+ def bot_streaming(message, history, max_new_tokens=512):
18
+ txt = message["text"]
19
+
20
+ messages = []
21
+ images = []
22
+
23
+ for i, msg in enumerate(history):
24
+ if isinstance(msg[0], tuple):
25
+ messages.append(
26
+ {
27
+ "role": "user",
28
+ "content": [
29
+ {"type": "text", "text": history[i + 1][0]},
30
+ {"type": "image"},
31
+ ],
32
+ }
33
+ )
34
+ messages.append(
35
+ {
36
+ "role": "assistant",
37
+ "content": [{"type": "text", "text": history[i + 1][1]}],
38
+ }
39
+ )
40
+ images.append(Image.open(msg[0][0]).convert("RGB"))
41
+ elif isinstance(history[i - 1], tuple) and isinstance(msg[0], str):
42
+ pass
43
+ elif isinstance(history[i - 1][0], str) and isinstance(msg[0], str):
44
+ messages.append(
45
+ {"role": "user", "content": [{"type": "text", "text": msg[0]}]}
46
+ )
47
+ messages.append(
48
+ {"role": "assistant", "content": [{"type": "text", "text": msg[1]}]}
49
+ )
50
+
51
+ if len(message["files"]) == 1:
52
+
53
+ if isinstance(message["files"][0], str):
54
+ image = Image.open(message["files"][0]).convert("RGB")
55
+ else:
56
+ image = Image.open(message["files"][0]["path"]).convert("RGB")
57
+ images.append(image)
58
+ messages.append(
59
+ {
60
+ "role": "user",
61
+ "content": [{"type": "text", "text": txt}, {"type": "image"}],
62
+ }
63
+ )
64
+ else:
65
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
66
+
67
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
68
+
69
+ if images == []:
70
+ inputs = processor(text=texts, return_tensors="pt").to("cuda")
71
+ else:
72
+ inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
73
+
74
+ streamer = TextIteratorStreamer(
75
+ processor, skip_special_tokens=True, skip_prompt=True
76
+ )
77
+
78
+ generation_kwargs = dict(
79
+ inputs,
80
+ streamer=streamer,
81
+ max_new_tokens=max_new_tokens,
82
+ do_sample=True,
83
+ temperature=0.6,
84
+ top_p=0.9,
85
+ )
86
+
87
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
88
+ thread.start()
89
+ buffer = ""
90
+
91
+ for new_text in streamer:
92
+ buffer += new_text
93
+ yield buffer
94
+
95
+
96
+ demo = gr.ChatInterface(
97
+ fn=bot_streaming,
98
+ title="Typhoon 2 Vision",
99
+ textbox=gr.MultimodalTextbox(),
100
+ additional_inputs=[
101
+ gr.Slider(
102
+ minimum=512,
103
+ maximum=1024,
104
+ value=512,
105
+ step=1,
106
+ label="Maximum number of new tokens to generate",
107
+ )
108
+ ],
109
+ cache_examples=False,
110
+ stop_btn="Stop Generation",
111
+ fill_height=True,
112
+ multimodal=True,
113
+ )
114
+
115
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ torchaudio
4
+ transformers
5
+ pillow
6
+ gradio
7
+ accelerate