Spaces:
Sleeping
Sleeping
File size: 4,751 Bytes
ede20d5 6f75063 ede20d5 aefe681 ede20d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings, set_global_tokenizer
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from transformers import AutoTokenizer
from datetime import datetime
from llama_index.core.memory import ChatMemoryBuffer
import time
# Define the data directory for loading documents
DATA_DIR = "docs"
# Mengecek apakah 'is_initialized' sudah ada di session state
if 'is_initialized' not in st.session_state:
st.session_state.is_initialized = False
# Inisialisasi yang hanya dilakukan sekali saat pertama kali load
if not st.session_state.is_initialized:
st.session_state.is_initialized = True
Settings.llm = HuggingFaceInferenceAPI(
model_name="HuggingFaceH4/zephyr-7b-beta",
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
context_window=3000,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.1},
# stream=True
)
Settings.embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
# Set the global tokenizer to use the tokenizer from HuggingFace for encoding inputs
set_global_tokenizer(
AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf").encode
)
print("# load data", datetime.now())
# Load documents from the data directory into the Vector Store Index
documents = SimpleDirectoryReader(DATA_DIR).load_data()
# Create Vector Store Index with HuggingFace Embedding
index = VectorStoreIndex.from_documents(documents)
# Create Prompt Template for Text-based Q&A
chat_text_qa_msgs = [
(
"user",
"""You are a Q&A assistant. For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
Context:
{context_str}
Question:
{query_str}
"""
)
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
# Initialize Chat Memory Buffer for Conversation Memory
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
# Create Query Engine with LLM and Template
query_engine = index.as_query_engine(
text_qa_template=text_qa_template,
# streaming=True,
memory=memory
)
if 'query_engine' not in st.session_state:
st.session_state.query_engine = query_engine
print("# loaded", datetime.now())
# Function to handle queries
def handle_query(query):
answer = st.session_state.query_engine.query(query)
if hasattr(answer, 'response'):
return answer.response
elif isinstance(answer, dict) and 'response' in answer:
return answer['response']
else:
return "Sorry, I couldn't find an answer."
# print("-- check", datetime.now())
# ============== Streamlit App ===============
st.title("POC LLM RAG ✅")
st.markdown("Retrieval-Augmented Generation (RAG) with Large Language Model (LLM) using llama-index library and Ollama.")
st.markdown("start chat ...🚀")
if 'messages' not in st.session_state:
st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Ask me anything about the documents.'}]
# Sidebar to list documents
with st.sidebar:
st.title("Documents:")
docs = SimpleDirectoryReader(DATA_DIR).list_resources()
for d in docs:
file_name = str(d).split('/')[-1]
st.info(file_name)
# for message in st.session_state.messages:
# with st.chat_message(message["role"]):
# st.markdown(message["content"])
# if prompt := st.chat_input("Ask me anything about the documents"):
# st.session_state.messages.append({"role": "user", "content": prompt})
# with st.chat_message("user"):
# st.markdown(prompt)
# with st.chat_message("assistant"):
# stream = handle_query(prompt)
# response = st.write_stream(stream)
# st.session_state.messages.append({"role": "assistant", "content": response})
user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
if user_prompt:
st.session_state.messages.append({'role': 'user', "content": user_prompt})
response = handle_query(user_prompt)
st.session_state.messages.append({'role': 'assistant', "content": response})
for message in st.session_state.messages:
with st.chat_message(message['role']):
st.write(message['content'])
|