Spaces:

qanta-challenge
/

quizbowl-submission

Running

quizbowl-submission / src /workflows /executors.py

Maharshi Gor

First Working commit

193db9d about 1 month ago

17.6 kB

	# %%
	import json
	from typing import Any

	import pydantic

	from llms import completion
	from workflows.errors import WorkflowError
	from workflows.structs import InputField, ModelStep, OutputField, Workflow
	from workflows.utils import create_dependency_graph, topological_sort

	"""
	Core workflow execution functionality.

	This module handles the execution of defined workflows, including input processing,
	dependency-based execution order, model calling, and output collection. It integrates
	with the litellm library to handle model interactions.

	Key components:
	- Utility functions for input/output transformation
	- Input processing and validation
	- Model step execution
	- Complete workflow execution with dependency resolution

	The module orchestrates the execution of steps in the correct order based on their
	dependencies and manages the flow of data between steps.
	"""


	def upper(x):
	if isinstance(x, str):
	return x.upper()
	return x


	def lower(x):
	if isinstance(x, str):
	return x.lower()
	return x


	TYPE_MAP = {
	"str": str,
	"int": int,
	"float": float,
	"bool": bool,
	}

	FUNCTION_MAP = {
	"upper": upper,
	"lower": lower,
	"len": len,
	"split": str.split,
	}


	def get_type(type_str: str) -> type:
	return TYPE_MAP.get(type_str, eval(type_str))


	def create_processed_inputs(model_step: ModelStep, available_vars: dict[str, Any]) -> dict[str, Any]:
	"""
	Creates processed inputs for a model step.

	This function extracts and processes the required inputs for a model step based on
	its input field definitions. It retrieves values from the available variables dictionary
	and applies any specified transformations.

	Args:
	model_step (ModelStep): The model step for which to create processed inputs.
	available_vars (dict[str, Any]): Dictionary of variables available for use as inputs.
	Keys are variable names, values are the variable values.

	Returns:
	dict[str, Any]: A dictionary of processed inputs ready for use by the model step.
	Keys are input field names, values are the processed input values.

	Raises:
	WorkflowError: If a required variable is not found in available_vars,
	or if a specified transformation function is not available.

	Example:
	>>> available_vars = {"step1.output": "Hello World"}
	>>> create_processed_inputs(model_step, available_vars)
	{"input_field_name": "HELLO WORLD"} # If upper transformation was specified
	"""
	processed_inputs: dict[str, Any] = {}
	for input_field in model_step.input_fields:
	var = input_field.variable
	value = available_vars[var]
	if input_field.func is not None:
	func = FUNCTION_MAP.get(input_field.func)
	func = func or eval(input_field.func)
	value = func(value)
	processed_inputs[input_field.name] = value
	return processed_inputs


	# %%
	def execute_model_step(
	model_step: ModelStep, available_vars: dict[str, Any], return_full_content: bool = False
	) -> dict[str, Any] \| tuple[dict[str, Any], str]:
	"""
	Executes a model step using the provided available variables.

	This function handles the complete execution of a model step, including:
	1. Processing inputs using variable references and transformations
	2. Constructing the appropriate prompt for the model
	3. Calling the model via litellm with structured output
	4. Processing and validating the model's response
	5. Applying any output transformations

	The function supports different providers and model types through the litellm
	integration, allowing for a consistent interface regardless of the underlying model.

	Args:
	model_step (ModelStep): The model step to execute, containing model details,
	input/output specifications, and system prompt.
	available_vars (dict[str, Any]): A dictionary of all variables available to this step,
	including outputs from previous steps and external inputs.

	Returns:
	dict[str, Any]: A dictionary of processed outputs from the model step,
	with keys matching the output field names.

	Raises:
	WorkflowError: If there's an error in input processing, model execution,
	or output validation.

	Example:
	>>> step = ModelStep(
	... id="summarize",
	... model="gpt-3.5-turbo",
	... provider="openai",
	... call_type="llm",
	... system_prompt="Summarize the text",
	... input_fields=[InputField(name="text", variable="input_text", description="Text to summarize")],
	... output_fields=[OutputField(name="summary", type="str", description="Summary of the text")]
	... )
	>>> execute_model_step(step, {"input_text": "Long text to be summarized..."})
	{"summary": "A concise summary of the text."}
	"""
	# Ensure inputs are processed using the specified functions in input_fields.
	processed_inputs = create_processed_inputs(model_step, available_vars)

	# Construct the input prompt for the model
	input_str = ", ".join(f"{k}={v}" for k, v in processed_inputs.items())
	step_result = f"{model_step.system_prompt} \| Inputs: {input_str}"

	# Define the expected output fields and their types
	fields = {
	field.name: (get_type(field.type), pydantic.Field(..., description=field.description))
	for field in model_step.output_fields
	}
	ModelResponse = pydantic.create_model("ModelResponse", **fields)

	# Execute the model step using litellm
	api_response = completion(
	model=f"{model_step.provider}/{model_step.model}",
	system=model_step.system_prompt,
	prompt=step_result,
	response_format=ModelResponse,
	)
	# api_response = litellm.completion(
	# model=model_step.model,
	# messages=[{"role": "user", "content": step_result}],
	# response_format=ModelResponse,
	# )

	# Extract and parse the model response
	# model_response_content = api_response["choices"][0]["message"]["content"]
	# model_response = json.loads(model_response_content)
	model_response = api_response["output"]
	# Map the parsed response to the output fields
	outputs = {field.name: model_response[field.name] for field in model_step.output_fields}
	if return_full_content:
	return outputs, api_response["content"]
	return outputs


	# Example usage
	if __name__ == "__main__":
	# Define a simple model step
	model_step = ModelStep(
	id="step1",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="You are a simple NLP tool that takes a string, and a number N, and return the first N entities in the string, and the total count of entities in the string.",
	input_fields=[
	InputField(name="sentence", description="The sentence to process", variable="sentence", func=None),
	InputField(name="n", description="The number of entities to return", variable="n", func=None),
	],
	output_fields=[
	OutputField(
	name="entities",
	description="The first N entities in the string as a list of strings",
	type="list[str]",
	func=None,
	),
	OutputField(name="count", description="The total count of entities in the string", type="int", func=None),
	],
	)

	# Define processed inputs
	processed_inputs = {"sentence": "Abdul Akbar is a good person, but Jesus is the son of God.", "n": 3}

	# Execute the model step
	outputs = execute_model_step(model_step, processed_inputs)
	print(outputs)


	# %%
	def execute_workflow(
	workflow: Workflow, input_values: dict[str, Any], return_full_content: bool = False
	) -> dict[str, Any] \| tuple[dict[str, Any], str]:
	"""
	Execute the given workflow as a computational graph.

	This function orchestrates the complete execution of a workflow by:

	1. Validating and populating initial values using the provided external inputs
	2. Building a dependency graph between workflow steps
	3. Determining a valid execution order using topological sorting
	4. Executing each step in the correct order, with inputs from previous steps
	5. Collecting and returning the final outputs

	The execution process ensures that all dependencies are satisfied before a step
	is executed, and that the data flows correctly between steps according to the
	variable references defined in each step's input fields.

	Args:
	workflow (Workflow): The workflow to execute, containing steps, their
	dependencies, and input/output specifications.
	input_values (dict[str, Any]): External input values to be used by the workflow.
	Keys should match the required workflow.inputs.

	Returns:
	dict[str, Any]: A dictionary of the workflow's outputs, with keys matching
	the variables defined in workflow.outputs.

	Raises:
	UnknownVariableError: If an input_field references a variable that is not
	provided externally nor produced by any step.
	CyclicDependencyError: If the workflow contains a circular dependency that
	prevents a valid execution order.
	FunctionNotFoundError: If a transformation function specified in input_fields.func
	or output_fields.func is not available.
	WorkflowError: For any other workflow-related errors, such as missing required inputs.

	Example:
	>>> workflow = Workflow(
	... steps={
	... "extract": ModelStep(...), # A step that extracts entities
	... "analyze": ModelStep(...) # A step that analyzes the entities
	... },
	... inputs=["text"],
	... outputs=["analyze.sentiment", "extract.entities"]
	... )
	>>> result = execute_workflow(workflow, {"text": "Apple is launching a new product tomorrow."})
	>>> print(result["analyze.sentiment"])
	"positive"
	>>> print(result["extract.entities"])
	["Apple", "product"]
	"""
	# Step 1: Pre-populate computed values with external workflow inputs.
	computed_values: dict[str, Any] = {}
	for var in workflow.inputs:
	if var not in input_values:
	raise WorkflowError(f"Missing required workflow input: {var}")
	computed_values[var] = input_values[var]

	# Step 2: Build dependency graph among model steps.
	# For each step, examine its input_fields. If an input is not in the pre-populated external inputs,
	# then it is expected to be produced by some step. Otherwise, raise an error.
	dependencies = create_dependency_graph(workflow, input_values)

	# Step 3: Determine the execution order of the steps using topological sort.
	# Raises an error if a cycle is detected.
	execution_order = topological_sort(dependencies)

	# Step 4: Execute steps in topological order.
	for step_id in execution_order:
	step = workflow.steps[step_id]

	# Execute the step
	outputs = execute_model_step(step, computed_values)
	outputs = {f"{step_id}.{k}": v for k, v in outputs.items()}
	computed_values.update(outputs)

	# Step 5: Gather and return workflow outputs.
	final_outputs: dict[str, Any] = {}
	for target, var in workflow.outputs.items():
	if var not in computed_values:
	raise WorkflowError(f"Workflow output variable {var} was not produced")
	final_outputs[target] = computed_values[var]

	return final_outputs


	def run_examples():
	"""
	Runs three example workflows demonstrating:
	1. A successful (linear) workflow execution.
	2. A cyclic dependency error.
	3. An unknown variable dependency error.
	"""
	print("Example 1: Successful Workflow Execution")
	# Example 1: Simple linear workflow.
	# External input "input.value" is provided. Two steps:
	# - step1 takes "input.value" and produces "step1.result".
	# - step2 uses "step1.result" and produces "step2.final".
	from workflows.structs import ModelStep, Workflow

	workflow_success = Workflow(
	steps={
	"step1": ModelStep(
	id="step1",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="Step1 processing",
	input_fields=[InputField(name="value", description="Input value", variable="input.value")],
	output_fields=[OutputField(name="result", description="Processed result", type="str", func="upper")],
	),
	"step2": ModelStep(
	id="step2",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="Step2 processing",
	input_fields=[InputField(name="result", description="Result from step1", variable="step1.result")],
	output_fields=[OutputField(name="final", description="Final output", type="str", func="lower")],
	),
	},
	inputs=["input.value"],
	outputs={"final": "step2.final"},
	)
	input_values_success = {"input.value": "Hello, World!"}
	try:
	outputs = execute_workflow(workflow_success, input_values_success)
	print("Workflow outputs:", outputs)
	except WorkflowError as e:
	print("Workflow failed with error:", e)

	print("\nExample 2: Cyclic Dependency Workflow")
	# Example 2: Cyclic dependency.
	# stepA depends on an output from stepB and vice versa.
	workflow_cycle = Workflow(
	steps={
	"stepA": ModelStep(
	id="stepA",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="StepA processing",
	input_fields=[
	InputField(name="input", description="Input from stepB", variable="stepB.output", func="identity")
	],
	output_fields=[OutputField(name="output", description="Output from A", type="str", func="upper")],
	),
	"stepB": ModelStep(
	id="stepB",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="StepB processing",
	input_fields=[
	InputField(name="input", description="Input from stepA", variable="stepA.output", func="identity")
	],
	output_fields=[OutputField(name="output", description="Output from B", type="str", func="upper")],
	),
	},
	inputs=[], # no external inputs
	outputs={"output": "stepB.output"},
	)
	try:
	outputs = execute_workflow(workflow_cycle, {})
	print("Workflow outputs:", outputs)
	except WorkflowError as e:
	print("Workflow failed with error:", e)

	print("\nExample 3: Unknown Variable Dependency Workflow")
	# Example 3: A workflow that references a variable not provided as an input or produced by any step.
	workflow_unknown = Workflow(
	steps={
	"stepX": ModelStep(
	id="stepX",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="StepX processing",
	input_fields=[
	InputField(
	name="input", description="Non-existent input", variable="nonexistent.value", func="identity"
	)
	],
	output_fields=[OutputField(name="output", description="Output from X", type="str", func="upper")],
	)
	},
	inputs=[], # no external inputs
	outputs={"output": "stepX.output"},
	)
	try:
	outputs = execute_workflow(workflow_unknown, {})
	print("Workflow outputs:", outputs)
	except WorkflowError as e:
	print("Workflow failed with error:", e)


	if __name__ == "__main__":
	# create example of model_step
	model_step = ModelStep(
	id="step1",
	model="gpt-4o-mini",
	provider="OpenAI",
	call_type="llm",
	system_prompt="You are a simple NLP tool that takes a string, and a number N, and return the first N entities in the string, and the total count of entities in the string.",
	input_fields=[
	InputField(name="sentence", description="The sentence to process", variable="sentence", func=None),
	InputField(name="n", description="The number of entities to return", variable="n", func=None),
	],
	output_fields=[
	OutputField(
	name="entities",
	description="The first N entities in the string as a list of strings",
	type="list[str]",
	func=None,
	),
	OutputField(name="count", description="The total count of entities in the string", type="int", func=None),
	],
	)

	processed_inputs = {"sentence": "Abdul Akbar is a good person, but Jesus is the son of God.", "n": 3}
	processed_inputs = create_processed_inputs(model_step, processed_inputs)
	print(processed_inputs)

	run_examples()

	# %%