sandz7 commited on
Commit
a423985
Β·
1 Parent(s): 5613b72

added async

Browse files
Files changed (1) hide show
  1. app.py +33 -65
app.py CHANGED
@@ -18,18 +18,17 @@ DESCRIPTION = '''
18
  <p>This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a> and a Multimodal from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
19
  </div>
20
  '''
 
21
  # Llava Installed
22
  llava_model = LlavaForConditionalGeneration.from_pretrained(
23
  "xtuner/llava-llama-3-8b-v1_1-transformers",
24
  torch_dtype=torch.float16,
25
  low_cpu_mem_usage=True,
26
- )
27
 
28
  llava_model.to("cuda:0")
29
-
30
  processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
31
-
32
- llava_model.generation_config.eos_token_id=128009
33
 
34
  # Stable Diffusor Installed
35
  base = DiffusionPipeline.from_pretrained(
@@ -50,67 +49,44 @@ refiner = DiffusionPipeline.from_pretrained(
50
  )
51
  refiner.to('cuda')
52
 
53
- # All Installed. Let's instance them in the function
54
  def multimodal_and_generation(message, history):
55
- """
56
- Receives input from gradio from the prompt but also
57
- if any images were passed that i also placed for formatting
58
- for PIL and with the prompt both are passed to proper generation,
59
- depending on the request from prompt, that prompt output will return here.
60
- """
61
  print(f"Message:\n{message}\nType:\n{type(message)}")
62
  image_path = None
63
  if message["files"]:
64
- if type(message["files"][-1]) == dict:
65
  image_path = message["files"][-1]["path"]
66
  else:
67
  image_path = message["files"][-1]
68
  else:
69
- # If no image was uploaded than look for past ones
70
  for hist in history:
71
- if type(hist[0]) == tuple:
72
- image_path = hist[0][0] # item inside items for history
73
 
74
  if image_path is None:
75
  input_prompt = message["text"]
76
- # base_prompt = '''gpt response: {input_prompt}'''
77
- # prompt_formatted = base_prompt.format(input_prompt=input_prompt)
78
- # GPT Generation
79
  client = OpenAI(api_key=API_KEY)
80
  stream = client.chat.completions.create(
81
  model="gpt-3.5-turbo",
82
- messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
83
- {"role": "user", "content": input_prompt}],
 
 
84
  stream=True,
85
  )
86
  return stream
87
-
88
  else:
89
- prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
90
- # Time to instance the llava
91
  image = Image.open(image_path)
92
  inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
93
- streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": False, "skip_prompt": True})
94
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
95
 
96
  thread = threading.Thread(target=llava_model.generate, kwargs=generation_kwargs)
97
  thread.start()
98
 
99
- # buffer = ""
100
- # for new_text in streamer:
101
- # # find <|eot_id|> and remove it from the new_text
102
- # if "<|eot_id|>" in new_text:
103
- # new_text = new_text.split("<|eot_id|>")[0]
104
- # buffer += new_text
105
- # generated_text_no_prompt = buffer
106
- # yield generated_text_no_prompt
107
  return streamer
108
-
109
  def diffusing(prompt):
110
- """
111
- Uses stable diffusion on the prompt and
112
- returns the image.
113
- """
114
  image = base(
115
  prompt=prompt,
116
  num_inference_steps=40,
@@ -135,62 +111,54 @@ def check_cuda_availability():
135
  mode = ""
136
 
137
  @spaces.GPU(duration=120)
138
- def bot_comms(message,
139
- history):
140
- """
141
- Communication between gradio and the models.
142
- """
143
  global mode
144
 
145
  if message == "check cuda":
146
  result = check_cuda_availability()
147
  yield result
148
  return
149
-
150
  if message == "imagery":
151
  mode = message
152
  yield "Imagery On! Type your prompt to make the image πŸ–ΌοΈ"
153
  return
154
-
155
  if message == "chatting":
156
  mode = message
157
  yield "Imagery Off. Ask me any questions. β˜„οΈ"
158
  return
159
-
160
  if mode == "imagery":
161
  print("On imagery\n\n")
162
  image = diffusing(
163
- message=message,
164
- history=history,
165
  )
166
- return image
167
-
168
- buffer = ""
169
- gpt_outputs = []
170
  if mode == "chatting" or mode == "":
171
  print("On chatting or no mode.\n\n")
172
  stream = multimodal_and_generation(
173
  message=message,
174
  history=history,
175
  )
176
- streamer_text = [text for text in stream]
177
- if "<|eot_id|>" not in streamer_text:
 
 
 
 
 
 
 
178
  for chunk in stream:
179
  if chunk.choices[0].delta.content is not None:
180
  text = chunk.choices[0].delta.content
181
  gpt_outputs.append(text)
182
  yield "".join(gpt_outputs)
183
- else:
184
- for text in stream:
185
- # find <|eot_id|> and remove it from the text
186
- if "<|eot_id|>" in text:
187
- text = text.split("<|eot_id|>")[0]
188
- buffer += text
189
- generated_text = buffer
190
- yield generated_text
191
-
192
-
193
- chatbot=gr.Chatbot(height=600, label="Chimera AI")
194
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
195
  with gr.Blocks(fill_height=True) as demo:
196
  gr.Markdown(DESCRIPTION)
@@ -203,4 +171,4 @@ with gr.Blocks(fill_height=True) as demo:
203
  )
204
 
205
  if __name__ == "__main__":
206
- demo.launch()
 
18
  <p>This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a> and a Multimodal from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
19
  </div>
20
  '''
21
+
22
  # Llava Installed
23
  llava_model = LlavaForConditionalGeneration.from_pretrained(
24
  "xtuner/llava-llama-3-8b-v1_1-transformers",
25
  torch_dtype=torch.float16,
26
  low_cpu_mem_usage=True,
27
+ )
28
 
29
  llava_model.to("cuda:0")
 
30
  processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
31
+ llava_model.generation_config.eos_token_id = 128009
 
32
 
33
  # Stable Diffusor Installed
34
  base = DiffusionPipeline.from_pretrained(
 
49
  )
50
  refiner.to('cuda')
51
 
 
52
  def multimodal_and_generation(message, history):
 
 
 
 
 
 
53
  print(f"Message:\n{message}\nType:\n{type(message)}")
54
  image_path = None
55
  if message["files"]:
56
+ if isinstance(message["files"][-1], dict):
57
  image_path = message["files"][-1]["path"]
58
  else:
59
  image_path = message["files"][-1]
60
  else:
 
61
  for hist in history:
62
+ if isinstance(hist[0], tuple):
63
+ image_path = hist[0][0]
64
 
65
  if image_path is None:
66
  input_prompt = message["text"]
 
 
 
67
  client = OpenAI(api_key=API_KEY)
68
  stream = client.chat.completions.create(
69
  model="gpt-3.5-turbo",
70
+ messages=[
71
+ {"role": "system", "content": "You are a helpful assistant called 'chimera'."},
72
+ {"role": "user", "content": input_prompt}
73
+ ],
74
  stream=True,
75
  )
76
  return stream
 
77
  else:
78
+ prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
 
79
  image = Image.open(image_path)
80
  inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
81
+ streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
82
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
83
 
84
  thread = threading.Thread(target=llava_model.generate, kwargs=generation_kwargs)
85
  thread.start()
86
 
 
 
 
 
 
 
 
 
87
  return streamer
88
+
89
  def diffusing(prompt):
 
 
 
 
90
  image = base(
91
  prompt=prompt,
92
  num_inference_steps=40,
 
111
  mode = ""
112
 
113
  @spaces.GPU(duration=120)
114
+ async def bot_comms(message, history):
 
 
 
 
115
  global mode
116
 
117
  if message == "check cuda":
118
  result = check_cuda_availability()
119
  yield result
120
  return
121
+
122
  if message == "imagery":
123
  mode = message
124
  yield "Imagery On! Type your prompt to make the image πŸ–ΌοΈ"
125
  return
126
+
127
  if message == "chatting":
128
  mode = message
129
  yield "Imagery Off. Ask me any questions. β˜„οΈ"
130
  return
131
+
132
  if mode == "imagery":
133
  print("On imagery\n\n")
134
  image = diffusing(
135
+ prompt=message,
 
136
  )
137
+ yield image
138
+ return
139
+
 
140
  if mode == "chatting" or mode == "":
141
  print("On chatting or no mode.\n\n")
142
  stream = multimodal_and_generation(
143
  message=message,
144
  history=history,
145
  )
146
+ if isinstance(stream, TextIteratorStreamer):
147
+ buffer = ""
148
+ for new_text in stream:
149
+ if "" in new_text:
150
+ new_text = new_text.split("")[0]
151
+ buffer += new_text
152
+ yield buffer
153
+ else:
154
+ gpt_outputs = []
155
  for chunk in stream:
156
  if chunk.choices[0].delta.content is not None:
157
  text = chunk.choices[0].delta.content
158
  gpt_outputs.append(text)
159
  yield "".join(gpt_outputs)
160
+
161
+ chatbot = gr.Chatbot(height=600, label="Chimera AI")
 
 
 
 
 
 
 
 
 
162
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
163
  with gr.Blocks(fill_height=True) as demo:
164
  gr.Markdown(DESCRIPTION)
 
171
  )
172
 
173
  if __name__ == "__main__":
174
+ demo.launch()