Spaces:

Zenithwang
/

infly-OpenCoder-8B-Instruct

Sleeping

App Files Files Community

Zenithwang commited on Nov 13, 2024

Commit

dde58dc

verified ·

1 Parent(s): d646671

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -12

app.py CHANGED Viewed

@@ -33,26 +33,20 @@ sft_start_token =  "<|im_start|>"
 sft_end_token = "<|im_end|>"
 ct_end_token = "<|endoftext|>"
-system_prompt= \
-'You are an AI assistant named Sailor created by Sea AI Lab. \
-Your answer should be friendly, unbiased, faithful, informative and detailed.'
-system_prompt = f"<|im_start|>{system_role}\n{system_prompt}<|im_end|>"
 # Function to generate model predictions.
 @spaces.GPU()
 def predict(message, history):
-    # history = []
-    # history_transformer_format = history + [[message, ""]]
     try:
         stop = StopOnTokens()
-        # Formatting the input for the model.
-        # messages =  system_prompt + sft_end_token.join([sft_end_token.join([f"\n{sft_start_token}{user_role}\n" + item[0], f"\n{sft_start_token}{assistant_role}\n" + item[1]])
-        #                     for item in history_transformer_format])
         model_messages = []
         print(f'history: {history}')
         for i, item in enumerate(history):
             model_messages.append({"role": user_role, "content": item[0]})
             model_messages.append({"role": assistant_role, "content": item[1]})
@@ -70,8 +64,7 @@ def predict(message, history):
             input_ids=model_inputs,
             streamer=streamer,
             max_new_tokens=1024,
-            do_sample=False,
-            stopping_criteria=StoppingCriteriaList([stop])
         )
         t = Thread(target=model.generate, kwargs=generate_kwargs)
         t.start()  # Starting the generation in a separate thread.

 sft_end_token = "<|im_end|>"
 ct_end_token = "<|endoftext|>"
+system_prompt= 'You are a CodeLLM developed by INF.'
 # Function to generate model predictions.
 @spaces.GPU()
 def predict(message, history):
     try:
         stop = StopOnTokens()
         model_messages = []
         print(f'history: {history}')
+        model_messages.append({"role": system_role, "content": system_prompt})
         for i, item in enumerate(history):
             model_messages.append({"role": user_role, "content": item[0]})
             model_messages.append({"role": assistant_role, "content": item[1]})
             input_ids=model_inputs,
             streamer=streamer,
             max_new_tokens=1024,
+            do_sample=False)
         )
         t = Thread(target=model.generate, kwargs=generate_kwargs)
         t.start()  # Starting the generation in a separate thread.