Spaces:

qanta-challenge
/

quizbowl-submission

Running

App Files Files Community

Maharshi Gor commited on Apr 1

Commit

973519b

1 Parent(s): 193db9d

Enhance model provider detection and add repository management script. Added support for multi step agent.

Browse files

Files changed (8) hide show

app.py +51 -2
check_repos.py +27 -0
src/components/quizbowl/bonus.py +7 -3
src/components/quizbowl/tossup.py +9 -1
src/llms.py +129 -0
src/utils.py +3 -1
src/workflows/qb/multi_step_agent.py +192 -0
src/workflows/qb/simple_agent.py +0 -8

app.py CHANGED Viewed

@@ -1,14 +1,60 @@
 import datasets
 import gradio as gr
 from components.quizbowl.bonus import BonusInterface
 from components.quizbowl.tossup import TossupInterface
 from display.custom_css import css_pipeline, css_tossup
 # Constants
-from envs import AVAILABLE_MODELS, DEFAULT_SELECTIONS, PLAYGROUND_DATASET_NAMES, THEME
 from workflows import factory
 js_preamble = """
 <link href="https://fonts.cdnfonts.com/css/roboto-mono" rel="stylesheet">
@@ -118,8 +164,11 @@ def main():
                 }
                 bonus_interface = BonusInterface(app, bonus_ds, AVAILABLE_MODELS, defaults)
-    app.queue(api_open=True).launch()
 if __name__ == "__main__":
     main()

 import datasets
 import gradio as gr
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
 from components.quizbowl.bonus import BonusInterface
 from components.quizbowl.tossup import TossupInterface
 from display.custom_css import css_pipeline, css_tossup
 # Constants
+from src.envs import (
+    API,
+    AVAILABLE_MODELS,
+    DEFAULT_SELECTIONS,
+    EVAL_REQUESTS_PATH,
+    EVAL_RESULTS_PATH,
+    PLAYGROUND_DATASET_NAMES,
+    QUEUE_REPO,
+    REPO_ID,
+    RESULTS_REPO,
+    THEME,
+    TOKEN,
+)
 from workflows import factory
+def restart_space():
+    API.restart_space(repo_id=REPO_ID)
+### Space initialisation
+try:
+    print(EVAL_REQUESTS_PATH)
+    snapshot_download(
+        repo_id=QUEUE_REPO,
+        local_dir=EVAL_REQUESTS_PATH,
+        repo_type="dataset",
+        tqdm_class=None,
+        etag_timeout=30,
+        token=TOKEN,
+    )
+except Exception:
+    restart_space()
+try:
+    print(EVAL_RESULTS_PATH)
+    snapshot_download(
+        repo_id=RESULTS_REPO,
+        local_dir=EVAL_RESULTS_PATH,
+        repo_type="dataset",
+        tqdm_class=None,
+        etag_timeout=30,
+        token=TOKEN,
+    )
+except Exception:
+    restart_space()
 js_preamble = """
 <link href="https://fonts.cdnfonts.com/css/roboto-mono" rel="stylesheet">
                 }
                 bonus_interface = BonusInterface(app, bonus_ds, AVAILABLE_MODELS, defaults)
+    app.queue(default_concurrency_limit=40).launch()
 if __name__ == "__main__":
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(restart_space, "interval", seconds=1800)
+    scheduler.start()
     main()

check_repos.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from huggingface_hub import HfApi
+from src.envs import QUEUE_REPO, RESULTS_REPO, TOKEN
+def check_and_create_repos():
+    api = HfApi(token=TOKEN)
+    # Check and create queue repo
+    try:
+        api.repo_info(repo_id=QUEUE_REPO, repo_type="dataset")
+        print(f"Queue repository {QUEUE_REPO} exists")
+    except Exception:
+        print(f"Creating queue repository {QUEUE_REPO}")
+        api.create_repo(repo_id=QUEUE_REPO, repo_type="dataset", exist_ok=True, private=False)
+    # Check and create results repo
+    try:
+        api.repo_info(repo_id=RESULTS_REPO, repo_type="dataset")
+        print(f"Results repository {RESULTS_REPO} exists")
+    except Exception:
+        print(f"Creating results repository {RESULTS_REPO}")
+        api.create_repo(repo_id=RESULTS_REPO, repo_type="dataset", exist_ok=True, private=False)
+if __name__ == "__main__":
+    check_and_create_repos()

src/components/quizbowl/bonus.py CHANGED Viewed

@@ -10,7 +10,7 @@ from datasets import Dataset
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState
 from submission import submit
-from workflows import factory
 from workflows.qb.simple_agent import SimpleBonusAgent
 from workflows.structs import ModelStep, Workflow
@@ -255,9 +255,13 @@ class BonusInterface:
         """Get the model outputs for a given question ID."""
         outputs = []
         leadin = example["leadin"]
         for i, part in enumerate(example["parts"]):
-            agent = SimpleBonusAgent(workflow=pipeline_state.workflow)
             # Run model for each part
             part_output = agent.run(leadin, part["part"])
@@ -384,7 +388,7 @@ class BonusInterface:
         )
         self.submit_btn.click(
-            fn=self.submit_model_output,
             inputs=[
                 self.model_name_input,
                 self.description_input,

 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState
 from submission import submit
+from workflows.qb.multi_step_agent import MultiStepBonusAgent
 from workflows.qb.simple_agent import SimpleBonusAgent
 from workflows.structs import ModelStep, Workflow
         """Get the model outputs for a given question ID."""
         outputs = []
         leadin = example["leadin"]
+        workflow = pipeline_state.workflow
+        if len(workflow.steps) > 1:
+            agent = MultiStepBonusAgent(workflow)
+        else:
+            agent = SimpleBonusAgent(workflow)
         for i, part in enumerate(example["parts"]):
             # Run model for each part
             part_output = agent.run(leadin, part["part"])
         )
         self.submit_btn.click(
+            fn=self.submit_model,
             inputs=[
                 self.model_name_input,
                 self.description_input,

src/components/quizbowl/tossup.py CHANGED Viewed

@@ -9,6 +9,7 @@ from datasets import Dataset
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState
 from submission import submit
 from workflows.qb.simple_agent import SimpleTossupAgent
 from workflows.structs import ModelStep, Workflow
@@ -21,6 +22,9 @@ from .plotting import (
     update_plot,
 )
 def add_model_scores(model_outputs: list[dict], clean_answers: list[str], run_indices: list[int]) -> list[dict]:
     """Add model scores to the model outputs."""
@@ -291,7 +295,11 @@ class TossupInterface:
         for run_idx in example["run_indices"]:
             question_runs.append(" ".join(tokens[: run_idx + 1]))
-        agent = SimpleTossupAgent(workflow=pipeline_state.workflow, buzz_threshold=buzz_threshold)
         outputs = list(agent.run(question_runs, early_stop=early_stop))
         outputs = add_model_scores(outputs, example["clean_answers"], example["run_indices"])
         return outputs

 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState
 from submission import submit
+from workflows.qb.multi_step_agent import MultiStepTossupAgent
 from workflows.qb.simple_agent import SimpleTossupAgent
 from workflows.structs import ModelStep, Workflow
     update_plot,
 )
+# TODO: Error handling on run tossup and evaluate tossup and show correct messages
+# TODO: ^^ Same for Bonus
 def add_model_scores(model_outputs: list[dict], clean_answers: list[str], run_indices: list[int]) -> list[dict]:
     """Add model scores to the model outputs."""
         for run_idx in example["run_indices"]:
             question_runs.append(" ".join(tokens[: run_idx + 1]))
+        workflow = pipeline_state.workflow
+        if len(workflow.steps) > 1:
+            agent = MultiStepTossupAgent(workflow, buzz_threshold)
+        else:
+            agent = SimpleTossupAgent(workflow, buzz_threshold)
         outputs = list(agent.run(question_runs, early_stop=early_stop))
         outputs = add_model_scores(outputs, example["clean_answers"], example["run_indices"])
         return outputs

src/llms.py ADDED Viewed

	@@ -0,0 +1,129 @@

+# %%
+import json
+import os
+from typing import Optional
+import cohere
+import json_repair
+import numpy as np
+from anthropic import Anthropic
+from langchain_anthropic import ChatAnthropic
+from langchain_cohere import ChatCohere
+from langchain_openai import ChatOpenAI
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from rich import print as rprint
+import utils
+from envs import AVAILABLE_MODELS
+class LLMOutput(BaseModel):
+    content: str = Field(description="The content of the response")
+    logprob: Optional[float] = Field(None, description="The log probability of the response")
+def completion(model: str, system: str, prompt: str, response_format, logprobs: bool = False) -> str:
+    """
+    Generate a completion from an LLM provider with structured output.
+    Args:
+        model (str): Provider and model name in format "provider/model" (e.g. "OpenAI/gpt-4")
+        system (str): System prompt/instructions for the model
+        prompt (str): User prompt/input
+        response_format: Pydantic model defining the expected response structure
+        logprobs (bool, optional): Whether to return log probabilities. Defaults to False.
+            Note: Not supported by Anthropic models.
+    Returns:
+        dict: Contains:
+            - output: The structured response matching response_format
+            - logprob: (optional) Sum of log probabilities if logprobs=True
+            - prob: (optional) Exponential of logprob if logprobs=True
+    Raises:
+        ValueError: If logprobs=True with Anthropic models
+    """
+    if model not in AVAILABLE_MODELS:
+        raise ValueError(f"Model {model} not supported")
+    model_name = AVAILABLE_MODELS[model]["model"]
+    provider = model.split("/")[0]
+    if provider == "Cohere":
+        return _cohere_completion(model_name, system, prompt, response_format, logprobs)
+    elif provider == "OpenAI":
+        return _openai_completion(model_name, system, prompt, response_format, logprobs)
+    elif provider == "Anthropic":
+        if logprobs:
+            raise ValueError("Anthropic does not support logprobs")
+        return _anthropic_completion(model_name, system, prompt, response_format)
+    else:
+        raise ValueError(f"Provider {provider} not supported")
+def _cohere_completion(model: str, system: str, prompt: str, response_model, logprobs: bool = True) -> str:
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user", "content": prompt},
+    ]
+    client = cohere.ClientV2(api_key=os.getenv("COHERE_API_KEY"))
+    response = client.chat(
+        model=model,
+        messages=messages,
+        response_format={"type": "json_schema", "json_schema": response_model.model_json_schema()},
+        logprobs=logprobs,
+    )
+    output = {}
+    output["content"] = response.message.content[0].text
+    output["output"] = response_model.model_validate_json(response.message.content[0].text).model_dump()
+    if logprobs:
+        output["logprob"] = sum(lp.logprobs[0] for lp in response.logprobs)
+        output["prob"] = np.exp(output["logprob"])
+    return output
+def _openai_completion(model: str, system: str, prompt: str, response_model, logprobs: bool = True) -> str:
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user", "content": prompt},
+    ]
+    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    response = client.beta.chat.completions.parse(
+        model=model,
+        messages=messages,
+        response_format=response_model,
+        logprobs=logprobs,
+    )
+    output = {}
+    output["content"] = response.choices[0].message.content
+    output["output"] = response.choices[0].message.parsed.model_dump()
+    if logprobs:
+        output["logprob"] = sum(lp.logprob for lp in response.choices[0].logprobs.content)
+        output["prob"] = np.exp(output["logprob"])
+    return output
+def _anthropic_completion(model: str, system: str, prompt: str, response_model) -> str:
+    llm = ChatAnthropic(model=model).with_structured_output(response_model, include_raw=True)
+    output = llm.invoke([("system", system), ("human", prompt)])
+    return {"content": output.raw, "output": output.parsed.model_dump()}
+if __name__ == "__main__":
+    class ExplainedAnswer(BaseModel):
+        """
+        The answer to the question and a terse explanation of the answer.
+        """
+        answer: str = Field(description="The short answer to the question")
+        explanation: str = Field(description="5 words terse best explanation of the answer.")
+    model = "Anthropic/claude-3-5-sonnet-20240620"
+    system = "You are an accurate and concise explainer of scientific concepts."
+    prompt = "Which planet is closest to the sun in the Milky Way galaxy? Answer directly, no explanation needed."
+    # response = _cohere_completion("command-r", system, prompt, ExplainedAnswer, logprobs=True)
+    response = completion(model, system, prompt, ExplainedAnswer, logprobs=False)
+    rprint(response)
+# %%

src/utils.py CHANGED Viewed

@@ -8,8 +8,10 @@ def guess_model_provider(model_name: str):
     model_name = model_name.lower()
     if model_name.startswith("gpt-"):
         return "OpenAI"
-    if "sonnet" in model_name or "claude" in model_name:
         return "Anthropic"
     raise ValueError(f"Model `{model_name}` not yet supported")

     model_name = model_name.lower()
     if model_name.startswith("gpt-"):
         return "OpenAI"
+    if "sonnet" in model_name or "claude" in model_name or "haiku" in model_name:
         return "Anthropic"
+    if "command" in model_name:
+        return "Cohere"
     raise ValueError(f"Model `{model_name}` not yet supported")

src/workflows/qb/multi_step_agent.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import time
+from typing import Any, Iterable
+from workflows.executors import execute_workflow
+from workflows.structs import Workflow
+def _get_workflow_response(workflow: Workflow, available_vars: dict[str, Any]) -> tuple[dict[str, Any], str, float]:
+    """Get response from executing a complete workflow."""
+    start_time = time.time()
+    response, content = execute_workflow(workflow, available_vars, return_full_content=True)
+    response_time = time.time() - start_time
+    return response, content, response_time
+class MultiStepTossupAgent:
+    """Agent for handling tossup questions with multiple steps in the workflow."""
+    external_input_variable = "question_text"
+    output_variables = ["answer", "confidence"]
+    def __init__(self, workflow: Workflow, buzz_threshold: float):
+        """Initialize the multi-step tossup agent.
+        Args:
+            workflow: The workflow containing multiple steps
+            buzz_threshold: Confidence threshold for buzzing
+        """
+        self.workflow = workflow
+        self.buzz_threshold = buzz_threshold
+        self.output_variables = list(workflow.outputs.keys())
+        # Validate input variables
+        if self.external_input_variable not in workflow.inputs:
+            raise ValueError(f"External input variable {self.external_input_variable} not found in workflow inputs")
+        # Validate output variables
+        for out_var in self.output_variables:
+            if out_var not in workflow.outputs:
+                raise ValueError(f"Output variable {out_var} not found in workflow outputs")
+    def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[dict]:
+        """Process a tossup question and decide when to buzz based on confidence.
+        Args:
+            question_runs: Progressive reveals of the question text
+            early_stop: Whether to stop after the first buzz
+        Yields:
+            Dict containing:
+                - answer: The model's answer
+                - confidence: Confidence score
+                - buzz: Whether to buzz
+                - question_fragment: Current question text
+                - position: Current position in question
+                - full_response: Complete model response
+                - response_time: Time taken for response
+                - step_outputs: Outputs from each step
+        """
+        for i, question_text in enumerate(question_runs):
+            # Execute the complete workflow
+            response, content, response_time = _get_workflow_response(
+                self.workflow, {self.external_input_variable: question_text}
+            )
+            buzz = response["confidence"] >= self.buzz_threshold
+            result = {
+                "answer": response["answer"],
+                "confidence": response["confidence"],
+                "buzz": buzz,
+                "question_fragment": question_text,
+                "position": i + 1,
+                "full_response": content,
+                "response_time": response_time,
+                "step_outputs": response.get("step_outputs", {}),  # Include intermediate step outputs
+            }
+            yield result
+            # If we've reached the confidence threshold, buzz and stop
+            if early_stop and buzz:
+                return
+class MultiStepBonusAgent:
+    """Agent for handling bonus questions with multiple steps in the workflow."""
+    external_input_variables = ["leadin", "part"]
+    output_variables = ["answer", "confidence", "explanation"]
+    def __init__(self, workflow: Workflow):
+        """Initialize the multi-step bonus agent.
+        Args:
+            workflow: The workflow containing multiple steps
+        """
+        self.workflow = workflow
+        self.output_variables = list(workflow.outputs.keys())
+        # Validate input variables
+        for input_var in self.external_input_variables:
+            if input_var not in workflow.inputs:
+                raise ValueError(f"External input variable {input_var} not found in workflow inputs")
+        # Validate output variables
+        for out_var in self.output_variables:
+            if out_var not in workflow.outputs:
+                raise ValueError(f"Output variable {out_var} not found in workflow outputs")
+    def run(self, leadin: str, part: str) -> dict:
+        """Process a bonus part with the given leadin.
+        Args:
+            leadin: The leadin text for the bonus question
+            part: The specific part text to answer
+        Returns:
+            Dict containing:
+                - answer: The model's answer
+                - confidence: Confidence score
+                - explanation: Explanation for the answer
+                - full_response: Complete model response
+                - response_time: Time taken for response
+                - step_outputs: Outputs from each step
+        """
+        response, content, response_time = _get_workflow_response(
+            self.workflow,
+            {
+                "leadin": leadin,
+                "part": part,
+            },
+        )
+        return {
+            "answer": response["answer"],
+            "confidence": response["confidence"],
+            "explanation": response["explanation"],
+            "full_response": content,
+            "response_time": response_time,
+            "step_outputs": response.get("step_outputs", {}),  # Include intermediate step outputs
+        }
+# Example usage
+if __name__ == "__main__":
+    # Load the Quizbowl dataset
+    from datasets import load_dataset
+    from workflows.factory import create_quizbowl_bonus_workflow, create_quizbowl_tossup_workflow
+    ds_name = "umdclip/leaderboard_co_set"
+    ds = load_dataset(ds_name, split="train")
+    # Create the agents with multi-step workflows
+    tossup_workflow = create_quizbowl_tossup_workflow()
+    tossup_agent = MultiStepTossupAgent(workflow=tossup_workflow, buzz_threshold=0.9)
+    bonus_workflow = create_quizbowl_bonus_workflow()
+    bonus_agent = MultiStepBonusAgent(workflow=bonus_workflow)
+    # Example for tossup mode
+    print("\n=== TOSSUP MODE EXAMPLE ===")
+    sample_question = ds[30]
+    print(sample_question["question_runs"][-1])
+    print(sample_question["gold_label"])
+    print()
+    question_runs = sample_question["question_runs"]
+    results = tossup_agent.run(question_runs, early_stop=True)
+    for result in results:
+        print(result["full_response"])
+        print(f"Guess at position {result['position']}: {result['answer']}")
+        print(f"Confidence: {result['confidence']}")
+        print("Step outputs:", result["step_outputs"])
+        if result["buzz"]:
+            print("Buzzed!\n")
+    # Example for bonus mode
+    print("\n=== BONUS MODE EXAMPLE ===")
+    sample_bonus = ds[31]  # Assuming this is a bonus question
+    leadin = sample_bonus["leadin"]
+    parts = sample_bonus["parts"]
+    print(f"Leadin: {leadin}")
+    for i, part in enumerate(parts):
+        print(f"\nPart {i + 1}: {part['part']}")
+        result = bonus_agent.run(leadin, part["part"])
+        print(f"Answer: {result['answer']}")
+        print(f"Confidence: {result['confidence']}")
+        print(f"Explanation: {result['explanation']}")
+        print(f"Response time: {result['response_time']:.2f}s")
+        print("Step outputs:", result["step_outputs"])

src/workflows/qb/simple_agent.py CHANGED Viewed

@@ -33,14 +33,6 @@ def _get_model_step_response(
     return response, content, response_time
-def _get_workflow_response(workflow: Workflow, available_vars: dict[str, Any]) -> tuple[dict[str, Any], str, float]:
-    """Get response from the LLM model."""
-    start_time = time.time()
-    response, content = execute_workflow(workflow, available_vars, return_full_content=True)
-    response_time = time.time() - start_time
-    return response, content, response_time
 class SimpleTossupAgent:
     external_input_variable = "question_text"
     output_variables = ["answer", "confidence"]

     return response, content, response_time
 class SimpleTossupAgent:
     external_input_variable = "question_text"
     output_variables = ["answer", "confidence"]