ranWang commited on
Commit
2155881
·
1 Parent(s): a801b76

Adjust the error return value and markdown marigin style

Browse files
Files changed (1) hide show
  1. app.py +44 -38
app.py CHANGED
@@ -1,31 +1,19 @@
1
  import gradio as gr
2
- from huggingface_hub import login
3
 
4
- import re
5
-
6
- # from vllm import LLM, SamplingParams
7
- import pandas as pd
8
  from dataclasses import dataclass
9
  from concurrent.futures import ThreadPoolExecutor, TimeoutError
10
- import os
11
- from typing import Dict, Any, List
12
 
13
- # code execution
14
  import os
15
  import re
16
- import signal
17
  import subprocess
18
  import tempfile
19
- from contextlib import contextmanager
20
- from typing import Tuple
21
- from tqdm import tqdm
22
- import time
23
  from sympy import N, simplify
24
  from sympy.parsing.latex import parse_latex
25
- import random
26
- from pathlib import Path
27
  from openai import OpenAI
28
 
 
29
  client = OpenAI(
30
  base_url=os.environ.get("SERVER_URL"),
31
  api_key=os.environ.get("HF_TOKEN"),
@@ -442,15 +430,6 @@ def validate_answer_is_numeric(x: str | int | float) -> int:
442
  return x
443
 
444
 
445
- def get_majority_vote(responses: List[int]) -> int:
446
- if len(responses) < 1:
447
- return 0
448
- else:
449
- c = Counter(responses)
450
- value, count = c.most_common()[0]
451
- return value
452
-
453
-
454
  def filter_answers(answers: List[str]) -> List[int]:
455
  formatted_answers = [validate_answer_is_numeric(a) for a in answers]
456
 
@@ -540,17 +519,45 @@ print(f"=== Running submission with config ===\n\n{config}")
540
 
541
 
542
  def generate(message, temperature):
543
- chat_completion = client.chat.completions.create(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
  model="tgi",
545
  messages=message,
546
  stream=True,
547
- max_tokens=1024,
548
  stop=["```output\n"],
549
  temperature=temperature,
550
  )
551
 
552
- for message in chat_completion:
553
- yield message.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
554
 
555
 
556
  def get_majority_text(data):
@@ -645,10 +652,6 @@ def process_code(
645
  return example
646
 
647
 
648
- # load the vllm instance and set sampling parameters
649
- # vllm = build_vllm(config)
650
-
651
-
652
  def solve_problem(problem, temperature, progress=gr.Progress()):
653
  problem = apply_template({"prompt": problem}, prompt=config.system_prompt)
654
  print(f"Problem: {problem}")
@@ -656,8 +659,8 @@ def solve_problem(problem, temperature, progress=gr.Progress()):
656
  sample = {
657
  "problem": problem, # not used for the submission TODO Remove
658
  "ground_truth": "unknown", # not used for the submission TODO Remove
659
- "text": "### Solution:\n",
660
- "gen_texts": "### Solution:\n", # used to store all the generated text
661
  "should_prune": False,
662
  "problem_index": -1, # not used for the submission TODO Remove
663
  "model_answers": "-1",
@@ -676,11 +679,14 @@ def solve_problem(problem, temperature, progress=gr.Progress()):
676
  {"role": "assistant", "content": sample["gen_texts"]},
677
  ]
678
 
679
- for reponse_message in generate(messages, temperature):
680
  if reponse_message is not None:
681
  step_reponse += reponse_message
682
  yield step_reponse
683
 
 
 
 
684
  sample["gen_texts"] = step_reponse
685
 
686
  # TODO: Maybe it should just return the result of running the code
@@ -705,13 +711,13 @@ def solve_problem(problem, temperature, progress=gr.Progress()):
705
  yield sample["gen_texts"]
706
 
707
 
708
- with gr.Blocks() as demo:
709
  with gr.Row():
710
  inp = gr.Textbox(placeholder="Problem", label="Problem", lines=5)
711
  with gr.Accordion("Advanced Options", open=False):
712
- temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, label="Temperature")
713
  with gr.Row():
714
- out = gr.Markdown()
715
 
716
  btn = gr.Button("Run")
717
 
 
1
  import gradio as gr
 
2
 
 
 
 
 
3
  from dataclasses import dataclass
4
  from concurrent.futures import ThreadPoolExecutor, TimeoutError
 
 
5
 
 
6
  import os
7
  import re
 
8
  import subprocess
9
  import tempfile
10
+ import json
11
+ from typing import Tuple, Dict, Any, List
 
 
12
  from sympy import N, simplify
13
  from sympy.parsing.latex import parse_latex
 
 
14
  from openai import OpenAI
15
 
16
+
17
  client = OpenAI(
18
  base_url=os.environ.get("SERVER_URL"),
19
  api_key=os.environ.get("HF_TOKEN"),
 
430
  return x
431
 
432
 
 
 
 
 
 
 
 
 
 
433
  def filter_answers(answers: List[str]) -> List[int]:
434
  formatted_answers = [validate_answer_is_numeric(a) for a in answers]
435
 
 
519
 
520
 
521
  def generate(message, temperature):
522
+ """
523
+ Generates a chat completion response by streaming data from the client chat model.
524
+
525
+ This function streams the response from the client chat model and yields the content
526
+ of the response chunk by chunk. If an error occurs, it yields the error message.
527
+
528
+ Parameters:
529
+ message (str): The input message to be sent to the chat model.
530
+ temperature (float): The sampling temperature to use. Higher values mean the model will take more risks.
531
+
532
+ Yields:
533
+ tuple: A tuple containing the content of the response and a boolean flag indicating if an error occurred.
534
+ If no error occurred, the boolean flag will be False and the content will be the response text.
535
+ If an error occurred, the boolean flag will be True and the content will be the error message.
536
+ """
537
+ stream = client.chat.completions.create(
538
  model="tgi",
539
  messages=message,
540
  stream=True,
541
+ max_tokens=1200,
542
  stop=["```output\n"],
543
  temperature=temperature,
544
  )
545
 
546
+ response = stream.response
547
+
548
+ # The reason why the library method is not used here is that if an error occurs,
549
+ # the returned data will not be a stream, and using the official library will result in an error.
550
+ for chunk in response.iter_bytes():
551
+ chunk = chunk.decode("utf-8")
552
+ chune_json = json.loads(chunk.replace("data:", ""))
553
+
554
+ if "error" in chune_json and chune_json["error"]:
555
+ yield chune_json["error"], True
556
+ break
557
+
558
+ content = chune_json["choices"][0]["delta"]["content"]
559
+ if content is not None:
560
+ yield content, False
561
 
562
 
563
  def get_majority_text(data):
 
652
  return example
653
 
654
 
 
 
 
 
655
  def solve_problem(problem, temperature, progress=gr.Progress()):
656
  problem = apply_template({"prompt": problem}, prompt=config.system_prompt)
657
  print(f"Problem: {problem}")
 
659
  sample = {
660
  "problem": problem, # not used for the submission TODO Remove
661
  "ground_truth": "unknown", # not used for the submission TODO Remove
662
+ "text": "## Solution:\n",
663
+ "gen_texts": "## Solution:\n", # used to store all the generated text
664
  "should_prune": False,
665
  "problem_index": -1, # not used for the submission TODO Remove
666
  "model_answers": "-1",
 
679
  {"role": "assistant", "content": sample["gen_texts"]},
680
  ]
681
 
682
+ for reponse_message, error in generate(messages, temperature):
683
  if reponse_message is not None:
684
  step_reponse += reponse_message
685
  yield step_reponse
686
 
687
+ if error:
688
+ return
689
+
690
  sample["gen_texts"] = step_reponse
691
 
692
  # TODO: Maybe it should just return the result of running the code
 
711
  yield sample["gen_texts"]
712
 
713
 
714
+ with gr.Blocks(css=".top-margin { margin-top: 20px; }") as demo:
715
  with gr.Row():
716
  inp = gr.Textbox(placeholder="Problem", label="Problem", lines=5)
717
  with gr.Accordion("Advanced Options", open=False):
718
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.1, label="Temperature")
719
  with gr.Row():
720
+ out = gr.Markdown(elem_classes=["top-margin"])
721
 
722
  btn = gr.Button("Run")
723