File size: 3,839 Bytes
683a4fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8440a15
683a4fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8440a15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
### RAG code
# Embedding model builder
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

def set_embed_model(model_name: str,
                    chunk_size: int = 256,
                    chunk_overlap: int = 25) -> None:
    Settings.llm = None
    Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)
    Settings.chunk_size = chunk_size
    Settings.chunk_overlap = chunk_overlap

class RAGModule:
    def __init__(self,
                 llm_model: str = "MarcoAland/llama3.1-rag-indo",
                 embedding_model: str = "MarcoAland/Indo-bge-m3",
                 docs_path: str = "data",
                 top_k: int = 3,
                 similarity_cutoff: float = 0.4):
        
        # Define embedding model
        set_embed_model(model_name=embedding_model)

        # Set vector DB
        documents = SimpleDirectoryReader(docs_path).load_data()
        index = VectorStoreIndex.from_documents(documents)
        retriever = VectorIndexRetriever(
            index=index,
            similarity_top_k=top_k,
        )

        self.top_k = top_k
        self.query_engine = RetrieverQueryEngine(
            retriever=retriever,
            node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)]
        )

    def format_context(self, response):
        context = "Jawab dengan akurat\n\nContext:\n"
        for i in range(self.top_k):
            context += response.source_nodes[i].text + "\n\n"
        return context
    
    def query(self, query: str):
        try:
            response = self.query_engine.query(query)
            context = self.format_context(response)
            return context
        except:
            return ""

    def prompt(self, context: str, instruction: str):
        return f"{context}\n ### Instruksi:\n {instruction}"

    def main(self, instruction: str):
        context = self.query(query=instruction)
        prompt = self.prompt(context=context, instruction=instruction)
        return prompt


### Chainlit code
import chainlit as cl
from openai import AsyncOpenAI

RAG_Trwira = RAGModule()

# Configure the async OpenAI client
client = AsyncOpenAI(api_key="34.69.9.203", base_url="http://34.69.9.203:11434/v1")

settings = {
    "model": "MarcoAland/llama3.1-rag-indo",
    "temperature":0.3,
    "max_tokens": 2048,
}

@cl.on_chat_start
async def start_chat():
    # Display a title in the UI using Markdown
    await cl.Message(content="# Hai, namaku Mitrakara👋\n\n ## Selamat datang!\n\nSiap menjadi partner dalam berkarya didunia profesional😊").send()

@cl.on_message
async def main(message: cl.Message):
    if "document:" in message.content.lower() or "documents:" in message.content.lower():
        # Prepare the message with documents context
        prompt = RAG_Trwira.main(message.content[10:]) # slice the "documents" command
    else:
        # Without documents context
        prompt = message.content
    
    # Format the messages as a list of message dictionaries
    message_formated = [
        {"role": "user", "content": prompt}
    ]

    # Create an initial empty message to send back to the user
    msg = cl.Message(content="")
    await msg.send()

    # Use streaming to handle partial responses
    stream = await client.chat.completions.create(messages=message_formated, stream=True, **settings)

    async for part in stream:
        if token := part.choices[0].delta.content or "":
            await msg.stream_token(token)

    # Update the message after streaming completion
    await msg.update()