Spaces:
Runtime error
Runtime error
langchain-qa-bot
/
docs
/langchain
/libs
/community
/langchain_community
/callbacks
/confident_callback.py
# flake8: noqa | |
import os | |
import warnings | |
from typing import Any, Dict, List, Optional, Union | |
from langchain_core.callbacks import BaseCallbackHandler | |
from langchain_core.agents import AgentAction, AgentFinish | |
from langchain_core.outputs import LLMResult | |
class DeepEvalCallbackHandler(BaseCallbackHandler): | |
"""Callback Handler that logs into deepeval. | |
Args: | |
implementation_name: name of the `implementation` in deepeval | |
metrics: A list of metrics | |
Raises: | |
ImportError: if the `deepeval` package is not installed. | |
Examples: | |
>>> from langchain_community.llms import OpenAI | |
>>> from langchain_community.callbacks import DeepEvalCallbackHandler | |
>>> from deepeval.metrics import AnswerRelevancy | |
>>> metric = AnswerRelevancy(minimum_score=0.3) | |
>>> deepeval_callback = DeepEvalCallbackHandler( | |
... implementation_name="exampleImplementation", | |
... metrics=[metric], | |
... ) | |
>>> llm = OpenAI( | |
... temperature=0, | |
... callbacks=[deepeval_callback], | |
... verbose=True, | |
... openai_api_key="API_KEY_HERE", | |
... ) | |
>>> llm.generate([ | |
... "What is the best evaluation tool out there? (no bias at all)", | |
... ]) | |
"Deepeval, no doubt about it." | |
""" | |
REPO_URL: str = "https://github.com/confident-ai/deepeval" | |
ISSUES_URL: str = f"{REPO_URL}/issues" | |
BLOG_URL: str = "https://docs.confident-ai.com" # noqa: E501 | |
def __init__( | |
self, | |
metrics: List[Any], | |
implementation_name: Optional[str] = None, | |
) -> None: | |
"""Initializes the `deepevalCallbackHandler`. | |
Args: | |
implementation_name: Name of the implementation you want. | |
metrics: What metrics do you want to track? | |
Raises: | |
ImportError: if the `deepeval` package is not installed. | |
ConnectionError: if the connection to deepeval fails. | |
""" | |
super().__init__() | |
# Import deepeval (not via `import_deepeval` to keep hints in IDEs) | |
try: | |
import deepeval # ignore: F401,I001 | |
except ImportError: | |
raise ImportError( | |
"""To use the deepeval callback manager you need to have the | |
`deepeval` Python package installed. Please install it with | |
`pip install deepeval`""" | |
) | |
if os.path.exists(".deepeval"): | |
warnings.warn( | |
"""You are currently not logging anything to the dashboard, we | |
recommend using `deepeval login`.""" | |
) | |
# Set the deepeval variables | |
self.implementation_name = implementation_name | |
self.metrics = metrics | |
warnings.warn( | |
( | |
"The `DeepEvalCallbackHandler` is currently in beta and is subject to" | |
" change based on updates to `langchain`. Please report any issues to" | |
f" {self.ISSUES_URL} as an `integration` issue." | |
), | |
) | |
def on_llm_start( | |
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any | |
) -> None: | |
"""Store the prompts""" | |
self.prompts = prompts | |
def on_llm_new_token(self, token: str, **kwargs: Any) -> None: | |
"""Do nothing when a new token is generated.""" | |
pass | |
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: | |
"""Log records to deepeval when an LLM ends.""" | |
from deepeval.metrics.answer_relevancy import AnswerRelevancy | |
from deepeval.metrics.bias_classifier import UnBiasedMetric | |
from deepeval.metrics.metric import Metric | |
from deepeval.metrics.toxic_classifier import NonToxicMetric | |
for metric in self.metrics: | |
for i, generation in enumerate(response.generations): | |
# Here, we only measure the first generation's output | |
output = generation[0].text | |
query = self.prompts[i] | |
if isinstance(metric, AnswerRelevancy): | |
result = metric.measure( | |
output=output, | |
query=query, | |
) | |
print(f"Answer Relevancy: {result}") # noqa: T201 | |
elif isinstance(metric, UnBiasedMetric): | |
score = metric.measure(output) | |
print(f"Bias Score: {score}") # noqa: T201 | |
elif isinstance(metric, NonToxicMetric): | |
score = metric.measure(output) | |
print(f"Toxic Score: {score}") # noqa: T201 | |
else: | |
raise ValueError( | |
f"""Metric {metric.__name__} is not supported by deepeval | |
callbacks.""" | |
) | |
def on_llm_error(self, error: BaseException, **kwargs: Any) -> None: | |
"""Do nothing when LLM outputs an error.""" | |
pass | |
def on_chain_start( | |
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any | |
) -> None: | |
"""Do nothing when chain starts""" | |
pass | |
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: | |
"""Do nothing when chain ends.""" | |
pass | |
def on_chain_error(self, error: BaseException, **kwargs: Any) -> None: | |
"""Do nothing when LLM chain outputs an error.""" | |
pass | |
def on_tool_start( | |
self, | |
serialized: Dict[str, Any], | |
input_str: str, | |
**kwargs: Any, | |
) -> None: | |
"""Do nothing when tool starts.""" | |
pass | |
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: | |
"""Do nothing when agent takes a specific action.""" | |
pass | |
def on_tool_end( | |
self, | |
output: Any, | |
observation_prefix: Optional[str] = None, | |
llm_prefix: Optional[str] = None, | |
**kwargs: Any, | |
) -> None: | |
"""Do nothing when tool ends.""" | |
pass | |
def on_tool_error(self, error: BaseException, **kwargs: Any) -> None: | |
"""Do nothing when tool outputs an error.""" | |
pass | |
def on_text(self, text: str, **kwargs: Any) -> None: | |
"""Do nothing""" | |
pass | |
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: | |
"""Do nothing""" | |
pass | |