AlanXian commited on
Commit
33032fd
·
1 Parent(s): 2003c2f

update app.py chat format

Browse files
Files changed (1) hide show
  1. app.py +21 -15
app.py CHANGED
@@ -51,14 +51,6 @@ h1 {
51
  # Load the tokenizer and model
52
  tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/Apollo-7B")
53
 
54
- chat = [
55
- {"role": "user", "content": "Hello, how are you?"},
56
- {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
57
- {"role": "user", "content": "I'd like to show off how chat templating works!"},
58
- ]
59
-
60
- tokenizer.apply_chat_template(chat, tokenize=False)
61
-
62
  model = AutoModelForCausalLM.from_pretrained("FreedomIntelligence/Apollo-7B", device_map="auto") # to("cuda:0")
63
  terminators = [
64
  tokenizer.eos_token_id,
@@ -66,24 +58,38 @@ terminators = [
66
  ]
67
 
68
  @spaces.GPU(duration=120)
69
- def chat_llama3_8b(message: str,
70
- history: list,
71
  temperature: float,
72
  max_new_tokens: int
73
  ) -> str:
74
  """
75
  Generate a streaming response using the llama3-8b model.
76
  Args:
77
- message (str): The input message.
78
- history (list): The conversation history used by ChatInterface.
79
  temperature (float): The temperature for generating the response.
80
  max_new_tokens (int): The maximum number of new tokens to generate.
81
  Returns:
82
  str: The generated response.
83
  """
 
 
 
 
84
  conversation = []
85
- for user, assistant in history:
86
- conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
 
 
 
 
 
 
 
 
 
 
 
 
87
  conversation.append({"role": "user", "content": message})
88
 
89
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
@@ -140,7 +146,7 @@ with gr.Blocks(fill_height=True, css=css) as demo:
140
  ],
141
  examples=[
142
  ['How to setup a human base on Mars? Give short answer.'],
143
- ['Explain theory of relativity to me like Im 8 years old.'],
144
  ['What is 9,000 * 9,000?'],
145
  ['Write a pun-filled happy birthday message to my friend Alex.'],
146
  ['Justify why a penguin might make a good king of the jungle.']
 
51
  # Load the tokenizer and model
52
  tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/Apollo-7B")
53
 
 
 
 
 
 
 
 
 
54
  model = AutoModelForCausalLM.from_pretrained("FreedomIntelligence/Apollo-7B", device_map="auto") # to("cuda:0")
55
  terminators = [
56
  tokenizer.eos_token_id,
 
58
  ]
59
 
60
  @spaces.GPU(duration=120)
61
+ def chat_llama3_8b(conversation_data: dict,
 
62
  temperature: float,
63
  max_new_tokens: int
64
  ) -> str:
65
  """
66
  Generate a streaming response using the llama3-8b model.
67
  Args:
68
+ conversation_data (dict): A dictionary containing 'text' and 'history'.
 
69
  temperature (float): The temperature for generating the response.
70
  max_new_tokens (int): The maximum number of new tokens to generate.
71
  Returns:
72
  str: The generated response.
73
  """
74
+ message = conversation_data.get("text", "")
75
+ history_str = conversation_data.get("history", "")
76
+
77
+ # 处理历史记录
78
  conversation = []
79
+ if history_str:
80
+ # 假设历史记录是以某种格式存储的字符串,需要根据实际格式进行解析
81
+ # 这里假设历史记录是以换行符分隔的用户和助手消息,偶数行是用户,奇数行是助手
82
+ lines = history_str.strip().split('\n')
83
+ for i in range(0, len(lines), 2):
84
+ if i+1 < len(lines):
85
+ user_msg = lines[i]
86
+ assistant_msg = lines[i+1]
87
+ conversation.extend([
88
+ {"role": "user", "content": user_msg},
89
+ {"role": "assistant", "content": assistant_msg}
90
+ ])
91
+
92
+ # 添加最新的用户消息
93
  conversation.append({"role": "user", "content": message})
94
 
95
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
 
146
  ],
147
  examples=[
148
  ['How to setup a human base on Mars? Give short answer.'],
149
+ ['Explain theory of relativity to me like I'm 8 years old.'],
150
  ['What is 9,000 * 9,000?'],
151
  ['Write a pun-filled happy birthday message to my friend Alex.'],
152
  ['Justify why a penguin might make a good king of the jungle.']