benjamin-chat / backend /logging.py
Gregor Betz
First code commit
8bd8cd0
raw
history blame
1.8 kB
"log chat messages and feedbacks to a dataset"
from typing import Tuple
import os
import tempfile
import ujson
import uuid
import huggingface_hub
import pandas as pd
LOGS_DATSET_PATH = "logikon/benjamin-logs"
async def log_messages(
messages: Tuple[str, str],
conversation_id: str,
step: int,
metadata: dict = None
):
data = {
"conversation_id": conversation_id,
"step": step,
"human": messages[0],
"ai": messages[1],
"metadata": list(metadata.items()) if metadata else []
}
with tempfile.TemporaryFile(mode="w+") as f:
ujson.dump(data, f)
f.flush()
api = huggingface_hub.HfApi()
api.upload_file(
path_or_fileobj=f.buffer,
path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"step_{step}.json"),
repo_id=LOGS_DATSET_PATH,
repo_type="dataset",
token=os.environ["HF_DATASETS_TOKEN"]
)
async def log_feedback(
liked: bool,
conversation_id: str,
step: int,
metadata: dict = None
):
data = {
"conversation_id": conversation_id,
"step": step,
"liked": liked,
"metadata": list(metadata.items()) if metadata else []
}
with tempfile.TemporaryFile(mode="w+") as f:
ujson.dump(data, f)
f.flush()
api = huggingface_hub.HfApi()
api.upload_file(
path_or_fileobj=f.buffer,
path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"feedback_{step[0]}_{str(uuid.uuid4())}.json"),
repo_id=LOGS_DATSET_PATH,
repo_type="dataset",
token=os.environ["HF_DATASETS_TOKEN"]
)