Spaces:

MarcoAland
/

Mitrakara-Professional-Assistant

Runtime error

App Files Files Community

Mitrakara-Professional-Assistant / app.py

MarcoAland

Update app.py

8440a15 verified 4 months ago

raw

history blame contribute delete

3.84 kB

	### RAG code
	# Embedding model builder
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
	from llama_index.core.retrievers import VectorIndexRetriever
	from llama_index.core.query_engine import RetrieverQueryEngine
	from llama_index.core.postprocessor import SimilarityPostprocessor

	def set_embed_model(model_name: str,
	chunk_size: int = 256,
	chunk_overlap: int = 25) -> None:
	Settings.llm = None
	Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)
	Settings.chunk_size = chunk_size
	Settings.chunk_overlap = chunk_overlap

	class RAGModule:
	def __init__(self,
	llm_model: str = "MarcoAland/llama3.1-rag-indo",
	embedding_model: str = "MarcoAland/Indo-bge-m3",
	docs_path: str = "data",
	top_k: int = 3,
	similarity_cutoff: float = 0.4):

	# Define embedding model
	set_embed_model(model_name=embedding_model)

	# Set vector DB
	documents = SimpleDirectoryReader(docs_path).load_data()
	index = VectorStoreIndex.from_documents(documents)
	retriever = VectorIndexRetriever(
	index=index,
	similarity_top_k=top_k,
	)

	self.top_k = top_k
	self.query_engine = RetrieverQueryEngine(
	retriever=retriever,
	node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)]
	)

	def format_context(self, response):
	context = "Jawab dengan akurat\n\nContext:\n"
	for i in range(self.top_k):
	context += response.source_nodes[i].text + "\n\n"
	return context

	def query(self, query: str):
	try:
	response = self.query_engine.query(query)
	context = self.format_context(response)
	return context
	except:
	return ""

	def prompt(self, context: str, instruction: str):
	return f"{context}\n ### Instruksi:\n {instruction}"

	def main(self, instruction: str):
	context = self.query(query=instruction)
	prompt = self.prompt(context=context, instruction=instruction)
	return prompt


	### Chainlit code
	import chainlit as cl
	from openai import AsyncOpenAI

	RAG_Trwira = RAGModule()

	# Configure the async OpenAI client
	client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1")

	settings = {
	"model": "MarcoAland/llama3.1-rag-indo",
	"temperature":0.3,
	"max_tokens": 2048,
	}

	@cl.on_chat_start
	async def start_chat():
	# Display a title in the UI using Markdown
	await cl.Message(content="# Hai, namaku Mitrakara👋\n\n ## Selamat datang!\n\nSiap menjadi partner dalam berkarya didunia profesional😊").send()

	@cl.on_message
	async def main(message: cl.Message):
	if "document:" in message.content.lower() or "documents:" in message.content.lower():
	# Prepare the message with documents context
	prompt = RAG_Trwira.main(message.content[10:]) # slice the "documents" command
	else:
	# Without documents context
	prompt = message.content

	# Format the messages as a list of message dictionaries
	message_formated = [
	{"role": "user", "content": prompt}
	]

	# Create an initial empty message to send back to the user
	msg = cl.Message(content="")
	await msg.send()

	# Use streaming to handle partial responses
	stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings)

	async for part in stream:
	if token := part.choices[0].delta.content or "":
	await msg.stream_token(token)

	# Update the message after streaming completion
	await msg.update()