sandz7 commited on
Commit
3f8d160
Β·
1 Parent(s): 0d88f51

revised it

Browse files
Files changed (1) hide show
  1. app.py +58 -117
app.py CHANGED
@@ -3,15 +3,12 @@ from transformers import TextIteratorStreamer, AutoProcessor, LlavaForConditiona
3
  from diffusers import DiffusionPipeline
4
  import gradio as gr
5
  import numpy as np
6
- import accelerate
7
- import spaces
8
  from PIL import Image
9
  import threading
10
- from openai import OpenAI
11
  import os
12
- import asyncio
13
- from typing import Any
14
 
 
15
  API_KEY = os.getenv('OPEN_AI_API_KEYS')
16
 
17
  DESCRIPTION = '''
@@ -21,7 +18,7 @@ DESCRIPTION = '''
21
  </div>
22
  '''
23
 
24
- # Llava Installed
25
  llava_model = LlavaForConditionalGeneration.from_pretrained(
26
  "xtuner/llava-llama-3-8b-v1_1-transformers",
27
  torch_dtype=torch.float16,
@@ -29,10 +26,12 @@ llava_model = LlavaForConditionalGeneration.from_pretrained(
29
  )
30
 
31
  llava_model.to("cuda:0")
 
32
  processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
 
33
  llava_model.generation_config.eos_token_id = 128009
34
 
35
- # Stable Diffusor Installed
36
  base = DiffusionPipeline.from_pretrained(
37
  "stabilityai/stable-diffusion-xl-base-1.0",
38
  torch_dtype=torch.float16,
@@ -52,27 +51,28 @@ refiner = DiffusionPipeline.from_pretrained(
52
  refiner.to('cuda')
53
 
54
  def multimodal_and_generation(message, history):
 
 
 
55
  print(f"Message:\n{message}\nType:\n{type(message)}")
56
  image_path = None
57
- if message["files"]:
58
- if isinstance(message["files"][-1], dict):
59
  image_path = message["files"][-1]["path"]
60
  else:
61
  image_path = message["files"][-1]
62
  else:
63
  for hist in history:
64
- if isinstance(hist[0], tuple):
65
  image_path = hist[0][0]
66
 
67
  if image_path is None:
68
  input_prompt = message["text"]
69
- client = OpenAI(api_key=API_KEY)
70
- stream = client.chat.completions.create(
71
  model="gpt-3.5-turbo",
72
- messages=[
73
- {"role": "system", "content": "You are a helpful assistant called 'chimera'."},
74
- {"role": "user", "content": input_prompt}
75
- ],
76
  stream=True,
77
  )
78
  return stream
@@ -87,8 +87,11 @@ def multimodal_and_generation(message, history):
87
  thread.start()
88
 
89
  return streamer
90
-
91
  def diffusing(prompt):
 
 
 
92
  image = base(
93
  prompt=prompt,
94
  num_inference_steps=40,
@@ -105,124 +108,62 @@ def diffusing(prompt):
105
 
106
  def check_cuda_availability():
107
  if torch.cuda.is_available():
108
- result = f"GPU: {torch.cuda.get_device_name(0)}"
109
- return result
110
  else:
111
  return "No CUDA device found."
112
 
113
  mode = ""
114
 
115
- # @spaces.GPU(duration=120)
116
- # async def bot_comms(message, history):
117
- # global mode
118
-
119
- # if message == "check cuda":
120
- # result = check_cuda_availability()
121
- # yield result
122
- # return
123
-
124
- # if message == "imagery":
125
- # mode = message
126
- # yield "Imagery On! Type your prompt to make the image πŸ–ΌοΈ"
127
- # return
128
-
129
- # if message == "chatting":
130
- # mode = message
131
- # yield "Imagery Off. Ask me any questions. β˜„οΈ"
132
- # return
133
-
134
- # if mode == "imagery":
135
- # print("On imagery\n\n")
136
- # image = diffusing(
137
- # prompt=message,
138
- # )
139
- # yield image
140
- # return
141
-
142
- # if mode == "chatting" or mode == "":
143
- # print("On chatting or no mode.\n\n")
144
- # stream = multimodal_and_generation(
145
- # message=message,
146
- # history=history,
147
- # )
148
- # gpt_outputs = []
149
- # async for chunk in stream:
150
- # if chunk.choices[0].delta.content is not None:
151
- # text = chunk.choices[0].delta.content
152
- # gpt_outputs.append(text)
153
- # yield "".join(gpt_outputs)
154
-
155
- async def bot_comms_async(message, history):
156
  global mode
157
 
158
  if message == "check cuda":
159
- result = check_cuda_availability()
160
- return [result]
161
-
162
  if message == "imagery":
163
  mode = message
164
- return ["Imagery On! Type your prompt to make the image πŸ–ΌοΈ"]
165
-
 
166
  if message == "chatting":
167
  mode = message
168
- return ["Imagery Off. Ask me any questions. β˜„οΈ"]
169
-
 
170
  if mode == "imagery":
171
  print("On imagery\n\n")
172
- image = diffusing(prompt=message)
173
- return [image]
174
-
 
 
 
175
  if mode == "chatting" or mode == "":
176
  print("On chatting or no mode.\n\n")
177
- stream = multimodal_and_generation(message=message, history=history)
178
- gpt_outputs = []
179
- async for chunk in stream:
180
- if chunk.choices[0].delta.content is not None:
181
  text = chunk.choices[0].delta.content
182
  gpt_outputs.append(text)
183
- return ["".join(gpt_outputs)]
184
-
185
- @spaces.GPU(duration=120)
186
- def bot_comms(message: str, history: Any):
187
- return asyncio.run(bot_comms_async(message, history))
188
-
189
- # Define your Gradio UI as usual
190
- import gradio as gr
191
-
192
- with gr.Blocks() as demo:
193
- chatbot = gr.Chatbot()
194
- msg = gr.Textbox()
195
- with gr.Row():
196
- submit = gr.Button("Submit")
197
-
198
- def user(message, history):
199
- return "", history + [[message, None]]
200
-
201
- def bot_response(message, history):
202
- response = bot_comms(message, history)
203
- return history + [[message, response]]
204
-
205
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
206
- bot_response, [msg, chatbot], [msg, chatbot]
207
- )
208
- submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
209
- bot_response, [msg, chatbot], [msg, chatbot]
210
  )
211
 
212
  if __name__ == "__main__":
213
- demo.launch(share=True)
214
-
215
- # chatbot = gr.Chatbot(height=600, label="Chimera AI")
216
- # chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
217
- # with gr.Blocks(fill_height=True) as demo:
218
- # gr.Markdown(DESCRIPTION)
219
- # gr.ChatInterface(
220
- # fn=bot_comms,
221
- # chatbot=chatbot,
222
- # fill_height=True,
223
- # multimodal=True,
224
- # textbox=chat_input,
225
- # )
226
-
227
- # if __name__ == "__main__":
228
- # demo.launch()
 
3
  from diffusers import DiffusionPipeline
4
  import gradio as gr
5
  import numpy as np
 
 
6
  from PIL import Image
7
  import threading
8
+ import openai
9
  import os
 
 
10
 
11
+ # Retrieve the OpenAI API key from the environment
12
  API_KEY = os.getenv('OPEN_AI_API_KEYS')
13
 
14
  DESCRIPTION = '''
 
18
  </div>
19
  '''
20
 
21
+ # Initialize the models
22
  llava_model = LlavaForConditionalGeneration.from_pretrained(
23
  "xtuner/llava-llama-3-8b-v1_1-transformers",
24
  torch_dtype=torch.float16,
 
26
  )
27
 
28
  llava_model.to("cuda:0")
29
+
30
  processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
31
+
32
  llava_model.generation_config.eos_token_id = 128009
33
 
34
+ # Initialize Stable Diffusion pipelines
35
  base = DiffusionPipeline.from_pretrained(
36
  "stabilityai/stable-diffusion-xl-base-1.0",
37
  torch_dtype=torch.float16,
 
51
  refiner.to('cuda')
52
 
53
  def multimodal_and_generation(message, history):
54
+ """
55
+ Generates a response based on the input message and optionally an image.
56
+ """
57
  print(f"Message:\n{message}\nType:\n{type(message)}")
58
  image_path = None
59
+ if "files" in message and message["files"]:
60
+ if type(message["files"][-1]) == dict:
61
  image_path = message["files"][-1]["path"]
62
  else:
63
  image_path = message["files"][-1]
64
  else:
65
  for hist in history:
66
+ if type(hist[0]) == tuple:
67
  image_path = hist[0][0]
68
 
69
  if image_path is None:
70
  input_prompt = message["text"]
71
+ client = openai.OpenAI(api_key=API_KEY)
72
+ stream = client.ChatCompletion.create(
73
  model="gpt-3.5-turbo",
74
+ messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
75
+ {"role": "user", "content": input_prompt}],
 
 
76
  stream=True,
77
  )
78
  return stream
 
87
  thread.start()
88
 
89
  return streamer
90
+
91
  def diffusing(prompt):
92
+ """
93
+ Generates an image using Stable Diffusion based on the input prompt.
94
+ """
95
  image = base(
96
  prompt=prompt,
97
  num_inference_steps=40,
 
108
 
109
  def check_cuda_availability():
110
  if torch.cuda.is_available():
111
+ return f"GPU: {torch.cuda.get_device_name(0)}"
 
112
  else:
113
  return "No CUDA device found."
114
 
115
  mode = ""
116
 
117
+ @spaces.GPU(duration=120)
118
+ def bot_comms(message, history):
119
+ """
120
+ Handles communication between Gradio and the models.
121
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  global mode
123
 
124
  if message == "check cuda":
125
+ yield check_cuda_availability()
126
+ return
127
+
128
  if message == "imagery":
129
  mode = message
130
+ yield "Imagery On! Type your prompt to make the image πŸ–ΌοΈ"
131
+ return
132
+
133
  if message == "chatting":
134
  mode = message
135
+ yield "Imagery Off. Ask me any questions. β˜„οΈ"
136
+ return
137
+
138
  if mode == "imagery":
139
  print("On imagery\n\n")
140
+ image = diffusing(message)
141
+ yield image
142
+ return
143
+
144
+ buffer = ""
145
+ gpt_outputs = []
146
  if mode == "chatting" or mode == "":
147
  print("On chatting or no mode.\n\n")
148
+ stream = multimodal_and_generation(message, history)
149
+ for chunk in stream:
150
+ if hasattr(chunk.choices[0].delta, "content"):
 
151
  text = chunk.choices[0].delta.content
152
  gpt_outputs.append(text)
153
+ yield "".join(gpt_outputs)
154
+
155
+ chatbot = gr.Chatbot(height=600, label="Chimera AI")
156
+ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
157
+
158
+ with gr.Blocks(fill_height=True) as demo:
159
+ gr.Markdown(DESCRIPTION)
160
+ gr.ChatInterface(
161
+ fn=bot_comms,
162
+ chatbot=chatbot,
163
+ fill_height=True,
164
+ multimodal=True,
165
+ textbox=chat_input,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  )
167
 
168
  if __name__ == "__main__":
169
+ demo.launch()