Spaces:
Runtime error
Runtime error
langchain-qa-bot
/
docs
/langchain
/libs
/community
/langchain_community
/document_loaders
/chatgpt.py
import datetime | |
import json | |
from typing import List | |
from langchain_core.documents import Document | |
from langchain_community.document_loaders.base import BaseLoader | |
def concatenate_rows(message: dict, title: str) -> str: | |
""" | |
Combine message information in a readable format ready to be used. | |
Args: | |
message: Message to be concatenated | |
title: Title of the conversation | |
Returns: | |
Concatenated message | |
""" | |
if not message: | |
return "" | |
sender = message["author"]["role"] if message["author"] else "unknown" | |
text = message["content"]["parts"][0] | |
date = datetime.datetime.fromtimestamp(message["create_time"]).strftime( | |
"%Y-%m-%d %H:%M:%S" | |
) | |
return f"{title} - {sender} on {date}: {text}\n\n" | |
class ChatGPTLoader(BaseLoader): | |
"""Load conversations from exported `ChatGPT` data.""" | |
def __init__(self, log_file: str, num_logs: int = -1): | |
"""Initialize a class object. | |
Args: | |
log_file: Path to the log file | |
num_logs: Number of logs to load. If 0, load all logs. | |
""" | |
self.log_file = log_file | |
self.num_logs = num_logs | |
def load(self) -> List[Document]: | |
with open(self.log_file, encoding="utf8") as f: | |
data = json.load(f)[: self.num_logs] if self.num_logs else json.load(f) | |
documents = [] | |
for d in data: | |
title = d["title"] | |
messages = d["mapping"] | |
text = "".join( | |
[ | |
concatenate_rows(messages[key]["message"], title) | |
for idx, key in enumerate(messages) | |
if not ( | |
idx == 0 | |
and messages[key]["message"]["author"]["role"] == "system" | |
) | |
] | |
) | |
metadata = {"source": str(self.log_file)} | |
documents.append(Document(page_content=text, metadata=metadata)) | |
return documents | |