vtrv.vls commited on
Commit
4fa4c7b
·
1 Parent(s): 9540a56

Functionality rework

Browse files
Files changed (3) hide show
  1. app.py +92 -59
  2. models.py +114 -5
  3. utils.py +42 -68
app.py CHANGED
@@ -2,70 +2,92 @@ import gradio
2
  import argparse
3
  import os
4
  import boto3
5
- from datetime import datetime
6
  import pandas as pd
7
  from copy import copy
8
 
9
- from utils import generate, send_to_s3
10
- from models import get_tinyllama, get_qwen2ins1b, response_tinyllama, response_qwen2ins1b
11
- from constants import css, js_code, js_light
12
-
13
- MERA_table = None
14
- TINYLLAMA = None
15
- QWEN2INS1B = None
16
 
17
- RIGHT_MODEL = None
18
- LEFT_MODEL = None
 
19
 
 
20
  S3_SESSION = None
21
-
22
- def giga_gen(content, chat_history):
23
- chat_history.append([content])
24
- res = generate(chat_history,'auth_token.json')
25
- chat_history[-1].append(res)
26
- send_to_s3(res, f'protobench/giga_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
27
- return '', chat_history
28
-
29
- def tiny_gen(content, chat_history):
30
- chat_history.append([content])
31
- res = response_tinyllama(TINY_LLAMA, chat_history)
32
- chat_history[-1].append(res)
33
- send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
34
- return '', chat_history
35
-
36
- def qwen_gen(content, chat_history):
37
- chat_history.append([content])
38
- res = response_qwen2ins1b(QWEN2INS1B, chat_history)
39
- chat_history[-1].append(res)
40
- send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
41
- return '', chat_history
42
-
43
- def model_gen(content, chat_history, model_type: str):
44
  if content is None:
45
  return '', []
46
  if len(content) == 0:
47
  return '', []
48
- gen = MODEL_LIB[model_type]
49
- return gen(content, chat_history)
 
 
 
 
 
 
 
 
 
 
50
 
51
- def model_regen(content, chat_history, model_type: str):
 
 
 
 
 
 
 
 
 
 
 
52
  if chat_history is None:
53
  return '', []
54
- if len(chat_history) == 0:
55
- return '', []
56
 
57
- gen = MODEL_LIB[model_type]
58
-
59
- msg = copy(chat_history[-1][0])
60
-
61
- return gen(msg, chat_history[:-1])
62
-
63
- def clear_chat():
64
- return '', []
 
 
 
 
65
 
66
- MODEL_LIB = {'RUBASE': giga_gen, 'TINYLLAMA': tiny_gen, 'QWEN2INS1B': qwen_gen}
 
 
 
 
 
 
 
 
 
 
67
 
68
  def tab_arena():
 
69
  with gradio.Row():
70
  with gradio.Column():
71
  model_left = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value="TINYLLAMA", interactive=True, multiselect=False, label="Left model")
@@ -85,7 +107,7 @@ def tab_arena():
85
 
86
  with gradio.Row():
87
  with gradio.Accordion("Parameters", open=False):
88
- context = gradio.Checkbox(label="No context", value=False)
89
  top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
90
  temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
91
  max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=512, step=1, interactive=True)
@@ -94,14 +116,30 @@ def tab_arena():
94
  clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
95
  regen_left = gradio.Button(value='Regenerate left answer')
96
  regen_right = gradio.Button(value='Regenerate right answer')
97
- regen_left.click(model_regen, [msg, chatbot_left, model_left], [msg, chatbot_left])
98
- regen_right.click(model_regen, [msg, chatbot_right, model_right], [msg, chatbot_right])
 
 
 
 
 
 
 
 
99
 
100
  with gradio.Blocks():
101
  model_left.change(clear_chat, [], [msg, chatbot_left])
102
  model_right.change(clear_chat, [], [msg, chatbot_right])
103
- msg.submit(model_gen, [msg, chatbot_left, model_left], [msg, chatbot_left])
104
- msg.submit(model_gen, [msg, chatbot_right, model_right], [msg, chatbot_right])
 
 
 
 
 
 
 
 
105
 
106
  # with gradio.Column():
107
  # gradio.ChatInterface(
@@ -161,8 +199,6 @@ def tab_leaderboard():
161
  with open("test.md", "r") as f:
162
  TEST_MD = f.read()
163
 
164
- available_models = ["GigaChat", ""] # list(model_info.keys())
165
-
166
  def build_demo():
167
  # global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
168
 
@@ -206,9 +242,6 @@ if __name__ == "__main__":
206
  # data_load(args.result_file)
207
  # TYPES = ["number", "markdown", "number"]
208
 
209
- TINY_LLAMA = get_tinyllama()
210
- QWEN2INS1B = get_qwen2ins1b()
211
-
212
  try:
213
  session = boto3.session.Session()
214
  S3_SESSION = session.client(
@@ -220,8 +253,8 @@ if __name__ == "__main__":
220
  except:
221
  print('Failed to start s3 session')
222
 
223
- demo = build_demo()
224
- demo.launch(share=args.share, height=3000, width="110%") # share=args.share
225
 
226
  # demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
227
  # demo.launch()
 
2
  import argparse
3
  import os
4
  import boto3
 
5
  import pandas as pd
6
  from copy import copy
7
 
8
+ import queue
 
 
 
 
 
 
9
 
10
+ from constants import css, js_code, js_light
11
+ from utils import model_response, clear_chat
12
+ from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
13
 
14
+ INIT_MODELS = dict()
15
  S3_SESSION = None
16
+ CURRENT_MODELS = queue.LifoQueue()
17
+ MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
18
+ GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
19
+
20
+ def model_gen(
21
+ content,
22
+ chat_history,
23
+ model_name: str,
24
+ top_p,
25
+ temp,
26
+ max_tokens,
27
+ no_context=False
28
+ ):
29
+
30
+ global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
31
+ model_manager(model_name, MODEL_LIB, 3)
 
 
 
 
 
 
 
32
  if content is None:
33
  return '', []
34
  if len(content) == 0:
35
  return '', []
36
+
37
+ chat_history = chat_history[-1] if no_context else chat_history
38
+
39
+ return model_response(
40
+ content,
41
+ chat_history,
42
+ S3_SESSION,
43
+ INIT_MODELS,
44
+ GEN_LIB,
45
+ model_name,
46
+ {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
47
+ )
48
 
49
+ def model_regen(
50
+ content,
51
+ chat_history,
52
+ model_name: str,
53
+ top_p,
54
+ temp,
55
+ max_tokens,
56
+ no_context=False
57
+ ):
58
+
59
+ global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
60
+ model_manager(model_name, MODEL_LIB, 3)
61
  if chat_history is None:
62
  return '', []
 
 
63
 
64
+ chat_history = chat_history[-1] if no_context else chat_history
65
+ content = copy(chat_history[-1][0])
66
+
67
+ return model_response(
68
+ content,
69
+ chat_history[:-1],
70
+ S3_SESSION,
71
+ INIT_MODELS,
72
+ GEN_LIB,
73
+ model_name,
74
+ {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
75
+ )
76
 
77
+ def model_manager(
78
+ add_model,
79
+ model_lib,
80
+ max_models=3
81
+ ):
82
+ global INIT_MODELS, CURRENT_MODELS
83
+ while CURRENT_MODELS.qsize() >= max_models:
84
+ model_del = CURRENT_MODELS.get()
85
+ INIT_MODELS[model_del] = None
86
+ CURRENT_MODELS.put(add_model)
87
+ INIT_MODELS[add_model] = model_lib[add_model]()
88
 
89
  def tab_arena():
90
+ global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS
91
  with gradio.Row():
92
  with gradio.Column():
93
  model_left = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value="TINYLLAMA", interactive=True, multiselect=False, label="Left model")
 
107
 
108
  with gradio.Row():
109
  with gradio.Accordion("Parameters", open=False):
110
+ no_context = gradio.Checkbox(label="No context", value=False)
111
  top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
112
  temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
113
  max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=512, step=1, interactive=True)
 
116
  clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
117
  regen_left = gradio.Button(value='Regenerate left answer')
118
  regen_right = gradio.Button(value='Regenerate right answer')
119
+ regen_left.click(
120
+ model_gen,
121
+ [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
122
+ [msg, chatbot_left]
123
+ )
124
+ regen_right.click(
125
+ model_gen,
126
+ [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
127
+ [msg, chatbot_right]
128
+ )
129
 
130
  with gradio.Blocks():
131
  model_left.change(clear_chat, [], [msg, chatbot_left])
132
  model_right.change(clear_chat, [], [msg, chatbot_right])
133
+ msg.submit(
134
+ model_gen,
135
+ [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
136
+ [msg, chatbot_left]
137
+ )
138
+ msg.submit(
139
+ model_gen,
140
+ [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
141
+ [msg, chatbot_right]
142
+ )
143
 
144
  # with gradio.Column():
145
  # gradio.ChatInterface(
 
199
  with open("test.md", "r") as f:
200
  TEST_MD = f.read()
201
 
 
 
202
  def build_demo():
203
  # global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
204
 
 
242
  # data_load(args.result_file)
243
  # TYPES = ["number", "markdown", "number"]
244
 
 
 
 
245
  try:
246
  session = boto3.session.Session()
247
  S3_SESSION = session.client(
 
253
  except:
254
  print('Failed to start s3 session')
255
 
256
+ app = build_demo()
257
+ app.launch(share=args.share, height=3000, width="110%") # share=args.share
258
 
259
  # demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
260
  # demo.launch()
models.py CHANGED
@@ -1,6 +1,77 @@
 
 
1
  import torch
 
 
2
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  def get_tinyllama():
5
  tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
6
  return tinyllama
@@ -17,9 +88,13 @@ def get_qwen2ins1b():
17
 
18
  def response_tinyllama(
19
  model=None,
20
- messages=None
 
21
  ):
22
 
 
 
 
23
  messages_dict = [
24
  {
25
  "role": "system",
@@ -32,13 +107,20 @@ def response_tinyllama(
32
  messages_dict.append({'role': 'assistant', 'content': step[1]})
33
 
34
  prompt = model.tokenizer.apply_chat_template(messages_dict, tokenize=False, add_generation_prompt=True)
35
- outputs = model(prompt, max_new_tokens=64, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
 
 
 
 
 
 
36
 
37
  return outputs[0]['generated_text'].split('<|assistant|>')[1].strip()
38
 
39
  def response_qwen2ins1b(
40
  model=None,
41
- messages=None
 
42
  ):
43
 
44
  messages_dict = [
@@ -61,7 +143,10 @@ def response_qwen2ins1b(
61
 
62
  generated_ids = model['model'].generate(
63
  model_inputs.input_ids,
64
- max_new_tokens=512
 
 
 
65
  )
66
  generated_ids = [
67
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -69,4 +154,28 @@ def response_qwen2ins1b(
69
 
70
  response = model['tokenizer'].batch_decode(generated_ids, skip_special_tokens=True)[0]
71
 
72
- return response # outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
  import torch
4
+ import os
5
+ from datetime import datetime, timedelta
6
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
7
 
8
+ class GigaChat:
9
+ def __init__(self, auth_file='auth_token.json'):
10
+ # url = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"
11
+ self.auth_url = "https://api.mlrnd.ru/api/v2/oauth"
12
+
13
+ # url = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"
14
+ self.gen_url = "https://api.mlrnd.ru/api/v1/chat/completions"
15
+
16
+ # payload='scope=GIGACHAT_API_CORP'
17
+ self.payload='scope=API_v1'
18
+
19
+ self.auth_file = None
20
+
21
+ if self.auth_file is None or not os.path.isfile(auth_file):
22
+ self.gen_giga_token(auth_file)
23
+
24
+ @classmethod
25
+ def get_giga(cls, auth_file='auth_token.json'):
26
+ return cls(auth_file)
27
+
28
+ def gen_giga_token(self, auth_file):
29
+ headers = {
30
+ 'Content-Type': 'application/x-www-form-urlencoded',
31
+ 'Accept': 'application/json',
32
+ 'RqUID': '1b519047-0ee9-4b63-8599-e5ffc9c77e72',
33
+ 'Authorization': os.getenv('GIGACHAT_API_TOKEN')
34
+ }
35
+
36
+ response = requests.request(
37
+ "POST",
38
+ self.auth_url,
39
+ headers=headers,
40
+ data=self.payload,
41
+ verify=False
42
+ )
43
+
44
+ with open(auth_file, 'w') as f:
45
+ json.dump(json.loads(response.text), f, ensure_ascii=False)
46
+
47
+
48
+ def get_text(self, content, auth_token=None, params=None):
49
+ if params is None:
50
+ params = dict()
51
+
52
+ payload = json.dumps(
53
+ {
54
+ "model": "Test_model",
55
+ "messages": content,
56
+ "temperature": params.get("temperature") if params.get("temperature") else 1,
57
+ "top_p": params.get("top_p") if params.get("top_p") else 0.9,
58
+ "n": params.get("n") if params.get("n") else 1,
59
+ "stream": False,
60
+ "max_tokens": params.get("max_tokens") if params.get("max_tokens") else 512,
61
+ "repetition_penalty": params.get("repetition_penalty") if params.get("repetition_penalty") else 1
62
+ }
63
+ )
64
+ headers = {
65
+ 'Content-Type': 'application/json',
66
+ 'Accept': 'application/json',
67
+ 'Authorization': f'Bearer {auth_token}'
68
+ }
69
+
70
+ response = requests.request("POST", self.gen_url, headers=headers, data=payload, verify=False)
71
+
72
+ return json.loads(response.text)
73
+
74
+
75
  def get_tinyllama():
76
  tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
77
  return tinyllama
 
88
 
89
  def response_tinyllama(
90
  model=None,
91
+ messages=None,
92
+ params=None
93
  ):
94
 
95
+ if params is None:
96
+ params = dict()
97
+
98
  messages_dict = [
99
  {
100
  "role": "system",
 
107
  messages_dict.append({'role': 'assistant', 'content': step[1]})
108
 
109
  prompt = model.tokenizer.apply_chat_template(messages_dict, tokenize=False, add_generation_prompt=True)
110
+ outputs = model(
111
+ prompt,
112
+ max_new_tokens = params.get("max_tokens") if params.get("max_tokens") else 512,
113
+ temperature = params.get("temperature") if params.get("temperature") else 1,
114
+ top_p = params.get("top_p") if params.get("top_p") else 0.9,
115
+ repetition_penalty = params.get("repetition_penalty") if params.get("repetition_penalty") else 1
116
+ )
117
 
118
  return outputs[0]['generated_text'].split('<|assistant|>')[1].strip()
119
 
120
  def response_qwen2ins1b(
121
  model=None,
122
+ messages=None,
123
+ params=None
124
  ):
125
 
126
  messages_dict = [
 
143
 
144
  generated_ids = model['model'].generate(
145
  model_inputs.input_ids,
146
+ max_new_tokens = params.get("max_tokens") if params.get("max_tokens") else 512,
147
+ temperature = params.get("temperature") if params.get("temperature") else 1,
148
+ top_p = params.get("top_p") if params.get("top_p") else 0.9,
149
+ repetition_penalty = params.get("repetition_penalty") if params.get("repetition_penalty") else 1
150
  )
151
  generated_ids = [
152
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 
154
 
155
  response = model['tokenizer'].batch_decode(generated_ids, skip_special_tokens=True)[0]
156
 
157
+ return response # outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()
158
+
159
+ def response_gigachat(
160
+ model=None,
161
+ messages=None,
162
+ model_params=None
163
+ ): # content=None, auth_file=None
164
+
165
+ with open(model.auth_file) as f:
166
+ auth_token = json.load(f)
167
+
168
+ if datetime.fromtimestamp(auth_token['expires_at']/1000) <= datetime.now() - timedelta(seconds=60):
169
+ model.gen_giga_token(model.auth_file)
170
+ with open(model.auth_file) as f:
171
+ auth_token = json.load(f)
172
+
173
+ content = []
174
+ for step in messages:
175
+ content.append({'role': 'user', 'content': step[0]})
176
+ if len(step) >= 2:
177
+ content.append({'role': 'assistant', 'content': step[1]})
178
+
179
+ resp = model.get_text(content, auth_token['access_token'], model_params)
180
+
181
+ return resp["choices"][0]["message"]["content"]
utils.py CHANGED
@@ -1,76 +1,50 @@
1
  import requests
2
  import json
3
  import os
4
- from datetime import datetime, timedelta
5
  import boto3
6
-
7
-
8
- def gen_auth_token(auth_file):
9
- # url = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"
10
- url = "https://api.mlrnd.ru/api/v2/oauth"
11
-
12
- # payload='scope=GIGACHAT_API_CORP'
13
- payload='scope=API_v1'
14
- headers = {
15
- 'Content-Type': 'application/x-www-form-urlencoded',
16
- 'Accept': 'application/json',
17
- 'RqUID': '1b519047-0ee9-4b63-8599-e5ffc9c77e72',
18
- 'Authorization': os.getenv('GIGACHAT_API_TOKEN')
19
- }
20
-
21
- response = requests.request("POST", url, headers=headers, data=payload, verify=False)
22
-
23
- with open(auth_file, 'w') as f:
24
- json.dump(json.loads(response.text), f, ensure_ascii=False)
25
-
26
-
27
- def get_text(content, auth_token=None):
28
- # url = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"
29
- url = "https://api.mlrnd.ru/api/v1/chat/completions"
30
-
31
- payload = json.dumps({
32
- "model": "Test_model",
33
- "messages": content,
34
- "temperature": 1,
35
- "top_p": 0.1,
36
- "n": 1,
37
- "stream": False,
38
- "max_tokens": 512,
39
- "repetition_penalty": 1
40
- })
41
- headers = {
42
- 'Content-Type': 'application/json',
43
- 'Accept': 'application/json',
44
- 'Authorization': f'Bearer {auth_token}'
45
- }
46
-
47
- response = requests.request("POST", url, headers=headers, data=payload, verify=False)
48
-
49
- return json.loads(response.text)
50
-
51
-
52
- def generate(content=None, auth_file=None):
53
- if auth_file is None or not os.path.isfile(auth_file):
54
- gen_auth_token(auth_file)
55
-
56
- with open(auth_file) as f:
57
- auth_token = json.load(f)
58
-
59
- if datetime.fromtimestamp(auth_token['expires_at']/1000) <= datetime.now() - timedelta(seconds=60):
60
- gen_auth_token(auth_file)
61
- with open(auth_file) as f:
62
- auth_token = json.load(f)
63
-
64
- content_giga = []
65
- for step in content:
66
- content_giga.append({'role': 'user', 'content': step[0]})
67
- if len(step) >= 2:
68
- content_giga.append({'role': 'assistant', 'content': step[1]})
69
-
70
- resp = get_text(content_giga, auth_token['access_token'])
71
-
72
- return resp["choices"][0]["message"]["content"]
73
 
74
  def send_to_s3(data, name, session):
75
  session.put_object(Bucket=os.getenv('S3_BUCKET'), Key=name, Body=json.dumps(data))
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  import json
3
  import os
 
4
  import boto3
5
+ from datetime import datetime
6
+ from copy import copy
7
+
8
+ def clear_chat():
9
+ return '', []
10
+
11
+ def model_response(
12
+ content,
13
+ chat_history,
14
+ s3_session,
15
+ initialized_models,
16
+ gen_lib,
17
+ model_name,
18
+ model_params
19
+ ):
20
+ chat_history.append([content])
21
+ res = gen_lib[model_name](initialized_models[model_name], chat_history, model_params)
22
+ chat_history[-1].append(res)
23
+ send_to_s3(res, f'protobench/{model_name}_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
24
+ return '', chat_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def send_to_s3(data, name, session):
27
  session.put_object(Bucket=os.getenv('S3_BUCKET'), Key=name, Body=json.dumps(data))
28
 
29
+ # def giga_gen(content, chat_history, model, s3_session):
30
+ # chat_history.append([content])
31
+ # res = response_gigachat(chat_history,'auth_token.json')
32
+ # chat_history[-1].append(res)
33
+ # send_to_s3(res, f'protobench/giga_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
34
+ # return '', chat_history
35
+
36
+ # def tiny_gen(content, chat_history, model, s3_session):
37
+ # chat_history.append([content])
38
+ # res = response_tinyllama(model, chat_history)
39
+ # chat_history[-1].append(res)
40
+ # send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
41
+ # return '', chat_history
42
+
43
+ # def qwen_gen(content, chat_history, model, s3_session):
44
+ # chat_history.append([content])
45
+ # res = response_qwen2ins1b(model, chat_history)
46
+ # chat_history[-1].append(res)
47
+ # send_to_s3(res, f'protobench/qwen_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
48
+ # return '', chat_history
49
+
50
+