Spaces:

qanta-challenge
/

quizbowl-submission

Running

quizbowl-submission / tests /test_executors.py

Maharshi Gor

Refactored single step and multi step qb agents into one module as QB Agents.

38e3800 3 months ago

12.8 kB

	import json
	from unittest.mock import patch

	import pytest

	from workflows.errors import CyclicDependencyError, WorkflowError
	from workflows.executors import (
	create_processed_inputs,
	execute_model_step,
	execute_workflow,
	lower,
	upper,
	)
	from workflows.structs import InputField, ModelStep, OutputField, Workflow

	# Tests for utility functions


	def test_upper():
	"""Test the upper function with different input types."""
	assert upper("hello") == "HELLO"
	assert upper("Hello World") == "HELLO WORLD"
	assert upper("") == ""
	# Non-string inputs should be returned unchanged
	assert upper(123) == 123
	assert upper([1, 2, 3]) == [1, 2, 3]
	assert upper(None) is None


	def test_lower():
	"""Test the lower function with different input types."""
	assert lower("HELLO") == "hello"
	assert lower("Hello World") == "hello world"
	assert lower("") == ""
	# Non-string inputs should be returned unchanged
	assert lower(123) == 123
	assert lower([1, 2, 3]) == [1, 2, 3]
	assert lower(None) is None


	# Tests for create_processed_inputs


	def test_create_processed_inputs_basic():
	"""Test basic input processing without transformations."""
	step = ModelStep(
	id="test_step",
	name="Test Step",
	model="gpt-4",
	provider="openai",
	call_type="llm",
	system_prompt="Test prompt",
	input_fields=[InputField(name="text", description="Input text", variable="input_text")],
	output_fields=[],
	)
	available_vars = {"input_text": "Hello World"}

	result = create_processed_inputs(step, available_vars)
	assert result == {"text": "Hello World"}


	def test_create_processed_inputs_with_transformation():
	"""Test input processing with transformation functions."""
	step = ModelStep(
	id="test_step",
	name="Test Step",
	model="gpt-4",
	provider="openai",
	call_type="llm",
	system_prompt="Test prompt",
	input_fields=[
	InputField(name="upper_text", description="Uppercase text", variable="input_text", func="upper"),
	InputField(name="lower_text", description="Lowercase text", variable="input_caps", func="lower"),
	],
	output_fields=[],
	)
	available_vars = {"input_text": "hello", "input_caps": "WORLD"}

	result = create_processed_inputs(step, available_vars)
	assert result == {"upper_text": "HELLO", "lower_text": "world"}


	def test_create_processed_inputs_missing_var():
	"""Test that appropriate error is raised when a variable is missing."""
	step = ModelStep(
	id="test_step",
	name="Test Step",
	model="gpt-4",
	provider="openai",
	call_type="llm",
	system_prompt="Test prompt",
	input_fields=[InputField(name="text", description="Input text", variable="missing_var")],
	output_fields=[],
	)
	available_vars = {"input_text": "Hello World"}

	with pytest.raises(KeyError):
	create_processed_inputs(step, available_vars)


	def test_create_processed_inputs_unknown_func():
	"""Test that appropriate error is raised when an unknown function is specified."""
	step = ModelStep(
	id="test_step",
	name="Test Step",
	model="gpt-4",
	provider="openai",
	call_type="llm",
	system_prompt="Test prompt",
	input_fields=[InputField(name="text", description="Input text", variable="input_text", func="unknown_func")],
	output_fields=[],
	)
	available_vars = {"input_text": "Hello World"}

	# This should raise an error when the function isn't found
	with pytest.raises(Exception):
	create_processed_inputs(step, available_vars)


	# Tests for execute_model_step


	@patch("workflows.executors.completion")
	def test_execute_model_step_success(mock_completion):
	"""Test successful execution of a model step with mocked litellm response."""
	# Mock the litellm response
	mock_response = {
	"content": json.dumps({"summary": "This is a summary"}),
	"output": {"summary": "This is a summary"},
	}
	mock_completion.return_value = mock_response

	# Create a test step
	step = ModelStep(
	id="summarize",
	name="Summarize Text",
	model="gpt-3.5-turbo",
	provider="OpenAI",
	call_type="llm",
	system_prompt="Summarize the text",
	input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
	output_fields=[OutputField(name="summary", description="Summary of the text", type="str")],
	)

	# Execute the step
	result = execute_model_step(step, {"input_text": "Long text to be summarized..."})

	# Verify the results
	assert result == {"summary": "This is a summary"}

	# Verify the litellm call was made correctly
	mock_completion.assert_called_once()
	args, kwargs = mock_completion.call_args
	assert kwargs["model"] == "OpenAI/gpt-3.5-turbo"
	assert "Summarize the text" in kwargs["system"]


	@patch("workflows.executors.completion")
	def test_execute_model_step_error(mock_completion):
	"""Test handling of errors in model step execution."""
	# Make litellm raise an exception
	mock_completion.side_effect = Exception("API Error")

	# Create a test step
	step = ModelStep(
	id="summarize",
	name="Summarize Text",
	model="gpt-3.5-turbo",
	provider="openai",
	call_type="llm",
	system_prompt="Summarize the text",
	input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
	output_fields=[OutputField(name="summary", description="Summary of the text", type="str")],
	)

	# Execute the step - should raise an exception
	with pytest.raises(Exception):
	execute_model_step(step, {"input_text": "Long text to be summarized..."})


	# Tests for execute_workflow


	@patch("workflows.executors.execute_model_step")
	def test_execute_workflow_simple(mock_execute_step):
	"""Test execution of a simple workflow with a single step."""
	# Configure mock to return expected outputs
	mock_execute_step.return_value = {"summary": "This is a summary"}

	# Create a simple workflow
	step = ModelStep(
	id="summarize",
	name="Summarize Text",
	model="gpt-3.5-turbo",
	provider="openai",
	call_type="llm",
	system_prompt="Summarize the text",
	input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
	output_fields=[OutputField(name="summary", description="Summary of the text", type="str")],
	)

	workflow = Workflow(steps={"summarize": step}, inputs=["input_text"], outputs={"summary": "summarize.summary"})

	# Execute the workflow
	final_outputs, computed_values, step_contents = execute_workflow(
	workflow, {"input_text": "Long text to be summarized..."}
	)

	# Verify the results
	assert final_outputs == {"summary": "This is a summary"}
	assert computed_values == {"input_text": "Long text to be summarized...", "summarize.summary": "This is a summary"}
	assert step_contents == {}

	# Verify execute_model_step was called correctly
	mock_execute_step.assert_called_once()


	@patch("workflows.executors.execute_model_step")
	def test_execute_workflow_multi_step(mock_execute_step):
	"""Test execution of a multi-step workflow with dependencies."""

	# Configure mock to return different values based on the step
	def side_effect(step, available_vars, return_full_content=False):
	if step.id == "extract":
	return {"entities": ["Apple", "product"]}
	elif step.id == "analyze":
	return {"sentiment": "positive"}
	return {}

	mock_execute_step.side_effect = side_effect

	# Create extract step
	extract_step = ModelStep(
	id="extract",
	name="Extract Entities",
	model="gpt-3.5-turbo",
	provider="openai",
	call_type="llm",
	system_prompt="Extract entities",
	input_fields=[InputField(name="text", description="Text to analyze", variable="input_text")],
	output_fields=[OutputField(name="entities", description="Extracted entities", type="list[str]")],
	)

	# Create analyze step that depends on extract step
	analyze_step = ModelStep(
	id="analyze",
	name="Analyze Sentiment",
	model="gpt-4",
	provider="openai",
	call_type="llm",
	system_prompt="Analyze sentiment",
	input_fields=[InputField(name="entities", description="Entities to analyze", variable="extract.entities")],
	output_fields=[OutputField(name="sentiment", description="Sentiment analysis", type="str")],
	)

	workflow = Workflow(
	steps={"extract": extract_step, "analyze": analyze_step},
	inputs=["input_text"],
	outputs={"entities": "extract.entities", "sentiment": "analyze.sentiment"},
	)

	# Execute the workflow
	final_outputs, computed_values, step_contents = execute_workflow(
	workflow, {"input_text": "Apple is launching a new product tomorrow."}
	)

	# Verify the results
	assert final_outputs == {"entities": ["Apple", "product"], "sentiment": "positive"}
	assert computed_values == {
	"input_text": "Apple is launching a new product tomorrow.",
	"extract.entities": ["Apple", "product"],
	"analyze.sentiment": "positive",
	}
	assert step_contents == {}

	# Verify execute_model_step was called twice (once for each step)
	assert mock_execute_step.call_count == 2


	def test_execute_workflow_missing_input():
	"""Test that an error is raised when a required input is missing."""
	step = ModelStep(
	id="summarize",
	name="Summarize Text",
	model="gpt-3.5-turbo",
	provider="openai",
	call_type="llm",
	system_prompt="Summarize the text",
	input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
	output_fields=[OutputField(name="summary", description="Summary of the text", type="str")],
	)

	workflow = Workflow(steps={"summarize": step}, inputs=["input_text"], outputs={"summary": "summarize.summary"})

	# Execute with missing input
	with pytest.raises(WorkflowError, match="Missing required workflow input"):
	execute_workflow(workflow, {})


	@patch("workflows.executors.create_dependency_graph")
	def test_execute_workflow_cyclic_dependency(mock_dependency_graph):
	"""Test that a cyclic dependency in the workflow raises an appropriate error."""
	# Make create_dependency_graph raise a CyclicDependencyError
	mock_dependency_graph.side_effect = CyclicDependencyError()

	step = ModelStep(
	id="test",
	name="Test Step",
	model="gpt-3.5-turbo",
	provider="openai",
	call_type="llm",
	system_prompt="Test",
	input_fields=[],
	output_fields=[],
	)

	workflow = Workflow(steps=[step], inputs=[], outputs={})

	# This should propagate the CyclicDependencyError
	with pytest.raises(CyclicDependencyError):
	execute_workflow(workflow, {})


	@patch("workflows.executors.execute_model_step")
	def test_execute_workflow_with_full_content(mock_execute_step):
	"""Test execution of a workflow with return_full_content=True."""
	# Configure mock to return expected outputs and content
	mock_execute_step.return_value = ({"summary": "This is a summary"}, "Full model response content")

	# Create a simple workflow
	step = ModelStep(
	id="summarize",
	name="Summarize Text",
	model="gpt-3.5-turbo",
	provider="openai",
	call_type="llm",
	system_prompt="Summarize the text",
	input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
	output_fields=[OutputField(name="summary", description="Summary of the text", type="str")],
	)

	workflow = Workflow(steps=[step], inputs=["input_text"], outputs={"summary": "summarize.summary"})

	# Execute the workflow with return_full_content=True
	final_outputs, computed_values, step_contents = execute_workflow(
	workflow, {"input_text": "Long text to be summarized..."}, return_full_content=True
	)

	# Verify the results
	assert final_outputs == {"summary": "This is a summary"}
	assert computed_values == {"input_text": "Long text to be summarized...", "summarize.summary": "This is a summary"}
	assert step_contents == {"summarize": "Full model response content"}

	# Verify execute_model_step was called correctly with return_full_content=True
	mock_execute_step.assert_called_once_with(step, computed_values, return_full_content=True)