sandz7 commited on
Commit
fc1b391
Β·
1 Parent(s): 8b17745

placed bot_comms and reversions to the chimera which is now multimodal_and_generation()

Browse files
Files changed (1) hide show
  1. app.py +94 -23
app.py CHANGED
@@ -7,6 +7,10 @@ import accelerate
7
  import spaces
8
  from PIL import Image
9
  import threading
 
 
 
 
10
 
11
  DESCRIPTION = '''
12
  <div>
@@ -47,8 +51,7 @@ refiner = DiffusionPipeline.from_pretrained(
47
  refiner.to('cuda')
48
 
49
  # All Installed. Let's instance them in the function
50
- @spaces.GPU(duration=120)
51
- def chimera(message, history):
52
  """
53
  Receives input from gradio from the prompt but also
54
  if any images were passed that i also placed for formatting
@@ -70,22 +73,17 @@ def chimera(message, history):
70
  prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
71
 
72
  if image_path is None:
73
- image = base(
74
- prompt=prompt,
75
- num_inference_steps=40,
76
- denoising_end=0.8,
77
- output_type="latent",
78
- ).images
79
- image = refiner(
80
- prompt=prompt,
81
- num_inference_steps=40,
82
- denoising_start=0.8,
83
- image=image
84
- ).images[0]
85
- return image
86
 
87
  else:
88
-
89
  # Time to instance the llava
90
  image = Image.open(image_path)
91
  inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
@@ -95,12 +93,85 @@ def chimera(message, history):
95
  thread = threading.Thread(target=llava_model.generate, kwargs=generation_kwargs)
96
  thread.start()
97
 
98
- buffer = ""
99
- for new_text in streamer:
100
- # find <|eot_id|> and remove it from the new_text
101
- if "<|eot_id|>" in new_text:
102
- new_text = new_text.split("<|eot_id|>")[0]
103
- buffer += new_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  generated_text_no_prompt = buffer
105
  yield generated_text_no_prompt
106
 
@@ -110,7 +181,7 @@ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], place
110
  with gr.Blocks(fill_height=True) as demo:
111
  gr.Markdown(DESCRIPTION)
112
  gr.ChatInterface(
113
- fn=chimera,
114
  chatbot=chatbot,
115
  fill_height=True,
116
  multimodal=True,
 
7
  import spaces
8
  from PIL import Image
9
  import threading
10
+ from openai import OpenAI
11
+ import os
12
+
13
+ API_KEY = os.getenv('OPEN_AI_API_KEYS')
14
 
15
  DESCRIPTION = '''
16
  <div>
 
51
  refiner.to('cuda')
52
 
53
  # All Installed. Let's instance them in the function
54
+ def multimodal_and_generation(message, history):
 
55
  """
56
  Receives input from gradio from the prompt but also
57
  if any images were passed that i also placed for formatting
 
73
  prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
74
 
75
  if image_path is None:
76
+ # GPT Generation
77
+ client = OpenAI(api_key=API_KEY)
78
+ stream = client.chat.completions.create(
79
+ model="gpt-3.5-turbo",
80
+ messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
81
+ {"role": "user", "content": message}],
82
+ stream=True,
83
+ )
84
+ return stream
 
 
 
 
85
 
86
  else:
 
87
  # Time to instance the llava
88
  image = Image.open(image_path)
89
  inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
 
93
  thread = threading.Thread(target=llava_model.generate, kwargs=generation_kwargs)
94
  thread.start()
95
 
96
+ # buffer = ""
97
+ # for new_text in streamer:
98
+ # # find <|eot_id|> and remove it from the new_text
99
+ # if "<|eot_id|>" in new_text:
100
+ # new_text = new_text.split("<|eot_id|>")[0]
101
+ # buffer += new_text
102
+ # generated_text_no_prompt = buffer
103
+ # yield generated_text_no_prompt
104
+ return streamer
105
+
106
+ def diffusing(prompt):
107
+ """
108
+ Uses stable diffusion on the prompt and
109
+ returns the image.
110
+ """
111
+ image = base(
112
+ prompt=prompt,
113
+ num_inference_steps=40,
114
+ denoising_end=0.8,
115
+ output_type="latent",
116
+ ).images
117
+ image = refiner(
118
+ prompt=prompt,
119
+ num_inference_steps=40,
120
+ denoising_start=0.8,
121
+ image=image
122
+ ).images[0]
123
+ return image
124
+
125
+ def check_cuda_availability():
126
+ if torch.cuda.is_available():
127
+ result = f"GPU: {torch.cuda.get_device_name(0)}"
128
+ return result
129
+ else:
130
+ return "No CUDA device found."
131
+
132
+ mode = ""
133
+
134
+ @spaces.GPU(duration=120)
135
+ def bot_comms(message,
136
+ history):
137
+ """
138
+ Communication between gradio and the models.
139
+ """
140
+ global mode
141
+
142
+ if message == "check cuda":
143
+ result = check_cuda_availability()
144
+ yield result
145
+ return
146
+
147
+ if message == "imagery":
148
+ mode = message
149
+ yield "Imagery On! Type your prompt to make the image πŸ–ΌοΈ"
150
+ return
151
+
152
+ if message == "chatting":
153
+ mode = message
154
+ yield "Imagery Off. Ask me any questions. β˜„οΈ"
155
+ return
156
+
157
+ if mode == "imagery":
158
+ image = diffusing(
159
+ message=message,
160
+ history=history,
161
+ )
162
+ return image
163
+
164
+ buffer = ""
165
+ if mode == "chatting" or mode == "":
166
+ stream = multimodal_and_generation(
167
+ message=message,
168
+ history=history,
169
+ )
170
+ for text in stream:
171
+ # find <|eot_id|> and remove it from the text
172
+ if "<|eot_id|>" in text:
173
+ text = text.split("<|eot_id|>")[0]
174
+ buffer += text
175
  generated_text_no_prompt = buffer
176
  yield generated_text_no_prompt
177
 
 
181
  with gr.Blocks(fill_height=True) as demo:
182
  gr.Markdown(DESCRIPTION)
183
  gr.ChatInterface(
184
+ fn=bot_comms,
185
  chatbot=chatbot,
186
  fill_height=True,
187
  multimodal=True,