Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	File size: 2,894 Bytes
			
			| 1a6d961 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from typing import List
import tiktoken
from langchain_core.messages import BaseMessage, ToolMessage, HumanMessage, AIMessage, SystemMessage, trim_messages
def str_token_counter(text: str) -> int:
    enc = tiktoken.get_encoding("o200k_base")
    return len(enc.encode(text))
def tiktoken_counter(messages: List[BaseMessage]) -> int:
    """Approximately reproduce https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
    For simplicity only supports str Message.contents.
    """
    num_tokens = 3  # every reply is primed with <|start|>assistant<|message|>
    tokens_per_message = 3
    tokens_per_name = 1
    for msg in messages:
        if isinstance(msg, HumanMessage):
            role = "user"
        elif isinstance(msg, AIMessage):
            role = "assistant"
        elif isinstance(msg, ToolMessage):
            role = "tool"
        elif isinstance(msg, SystemMessage):
            role = "system"
        else:
            raise ValueError(f"Unsupported messages type {msg.__class__}")
        num_tokens += (
            tokens_per_message
            + str_token_counter(role)
            + str_token_counter(msg.content)
        )
        if msg.name:
            num_tokens += tokens_per_name + str_token_counter(msg.name)
    return num_tokens
def convert_to_openai_messages(messages: List[BaseMessage]) -> List[dict]:
    """Convert LangChain messages to OpenAI format."""
    openai_messages = []
    
    for msg in messages:
        message_dict = {"content": msg.content}
        
        if isinstance(msg, HumanMessage):
            message_dict["role"] = "user"
        elif isinstance(msg, AIMessage):
            message_dict["role"] = "assistant"
        elif isinstance(msg, SystemMessage):
            message_dict["role"] = "system"
        elif isinstance(msg, ToolMessage):
            message_dict["role"] = "tool"
        else:
            raise ValueError(f"Unsupported message type: {msg.__class__}")
            
        if msg.name:
            message_dict["name"] = msg.name
            
        openai_messages.append(message_dict)
    
    return openai_messages
def trim_messages_openai(messages: List[BaseMessage]) -> List[dict]:
    """Trim LangChain messages and convert to OpenAI format."""
    
    trimmed_messages = trim_messages(
        messages,
        token_counter=tiktoken_counter,
        strategy="last",
        max_tokens=45,
        start_on="human",
        end_on=("human", "tool"),
        include_system=True,
    )
    openai_format_messages = convert_to_openai_messages(trimmed_messages)
    return openai_format_messages
# Test
# messages = [SystemMessage(content="You are a helpful assistant."), HumanMessage(query)]
# openai_format_messages = trim_messages_openai(messages) | 
