"log chat messages and feedbacks to a dataset" from typing import Tuple import os import tempfile import ujson import uuid import huggingface_hub import pandas as pd LOGS_DATSET_PATH = "logikon/benjamin-logs" async def log_messages( messages: Tuple[str, str], conversation_id: str, step: int, metadata: dict = None ): data = { "conversation_id": conversation_id, "step": step, "human": messages[0], "ai": messages[1], "metadata": list(metadata.items()) if metadata else [] } with tempfile.TemporaryFile(mode="w+") as f: ujson.dump(data, f) f.flush() api = huggingface_hub.HfApi() api.upload_file( path_or_fileobj=f.buffer, path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"step_{step}.json"), repo_id=LOGS_DATSET_PATH, repo_type="dataset", token=os.environ["HF_DATASETS_TOKEN"] ) async def log_feedback( liked: bool, conversation_id: str, step: int, metadata: dict = None ): data = { "conversation_id": conversation_id, "step": step, "liked": liked, "metadata": list(metadata.items()) if metadata else [] } with tempfile.TemporaryFile(mode="w+") as f: ujson.dump(data, f) f.flush() api = huggingface_hub.HfApi() api.upload_file( path_or_fileobj=f.buffer, path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"feedback_{step[0]}_{str(uuid.uuid4())}.json"), repo_id=LOGS_DATSET_PATH, repo_type="dataset", token=os.environ["HF_DATASETS_TOKEN"] )