Spaces:
Runtime error
Runtime error
### RAG code | |
# Embedding model builder | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex | |
from llama_index.core.retrievers import VectorIndexRetriever | |
from llama_index.core.query_engine import RetrieverQueryEngine | |
from llama_index.core.postprocessor import SimilarityPostprocessor | |
def set_embed_model(model_name: str, | |
chunk_size: int = 256, | |
chunk_overlap: int = 25) -> None: | |
Settings.llm = None | |
Settings.embed_model = HuggingFaceEmbedding(model_name=model_name) | |
Settings.chunk_size = chunk_size | |
Settings.chunk_overlap = chunk_overlap | |
class RAGModule: | |
def __init__(self, | |
llm_model: str = "MarcoAland/llama3.1-rag-indo", | |
embedding_model: str = "MarcoAland/Indo-bge-m3", | |
docs_path: str = "data", | |
top_k: int = 3, | |
similarity_cutoff: float = 0.4): | |
# Define embedding model | |
set_embed_model(model_name=embedding_model) | |
# Set vector DB | |
documents = SimpleDirectoryReader(docs_path).load_data() | |
index = VectorStoreIndex.from_documents(documents) | |
retriever = VectorIndexRetriever( | |
index=index, | |
similarity_top_k=top_k, | |
) | |
self.top_k = top_k | |
self.query_engine = RetrieverQueryEngine( | |
retriever=retriever, | |
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)] | |
) | |
def format_context(self, response): | |
context = "Jawab dengan akurat\n\nContext:\n" | |
for i in range(self.top_k): | |
context += response.source_nodes[i].text + "\n\n" | |
return context | |
def query(self, query: str): | |
try: | |
response = self.query_engine.query(query) | |
context = self.format_context(response) | |
return context | |
except: | |
return "" | |
def prompt(self, context: str, instruction: str): | |
return f"{context}\n ### Instruksi:\n {instruction}" | |
def main(self, instruction: str): | |
context = self.query(query=instruction) | |
prompt = self.prompt(context=context, instruction=instruction) | |
return prompt | |
### Chainlit code | |
import chainlit as cl | |
from openai import AsyncOpenAI | |
RAG_Trwira = RAGModule() | |
# Configure the async OpenAI client | |
client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1") | |
settings = { | |
"model": "MarcoAland/llama3.1-rag-indo", | |
"temperature":0.3, | |
"max_tokens": 2048, | |
} | |
async def start_chat(): | |
# Display a title in the UI using Markdown | |
await cl.Message(content="# Hai, namaku Mitrakara👋\n\n ## Selamat datang!\n\nSiap menjadi partner dalam berkarya didunia profesional😊").send() | |
async def main(message: cl.Message): | |
if "document:" in message.content.lower() or "documents:" in message.content.lower(): | |
# Prepare the message with documents context | |
prompt = RAG_Trwira.main(message.content[10:]) # slice the "documents" command | |
else: | |
# Without documents context | |
prompt = message.content | |
# Format the messages as a list of message dictionaries | |
message_formated = [ | |
{"role": "user", "content": prompt} | |
] | |
# Create an initial empty message to send back to the user | |
msg = cl.Message(content="") | |
await msg.send() | |
# Use streaming to handle partial responses | |
stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings) | |
async for part in stream: | |
if token := part.choices[0].delta.content or "": | |
await msg.stream_token(token) | |
# Update the message after streaming completion | |
await msg.update() | |