Spaces:
Sleeping
Sleeping
File size: 4,666 Bytes
ede20d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import streamlit as st
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings, set_global_tokenizer
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from transformers import AutoTokenizer
from datetime import datetime
from llama_index.core.memory import ChatMemoryBuffer
import time
# Define the data directory for loading documents
DATA_DIR = "docs"
# Mengecek apakah 'is_initialized' sudah ada di session state
if 'is_initialized' not in st.session_state:
st.session_state.is_initialized = False
# Inisialisasi yang hanya dilakukan sekali saat pertama kali load
if not st.session_state.is_initialized:
st.session_state.is_initialized = True
Settings.llm = HuggingFaceInferenceAPI(
model_name="HuggingFaceH4/zephyr-7b-beta",
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
context_window=3000,
max_new_tokens=512,
generate_kwargs={"temperature": 0.1},
# stream=True
)
Settings.embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
# Set the global tokenizer to use the tokenizer from HuggingFace for encoding inputs
set_global_tokenizer(
AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf").encode
)
print("# load data", datetime.now())
# Load documents from the data directory into the Vector Store Index
documents = SimpleDirectoryReader(DATA_DIR).load_data()
# Create Vector Store Index with HuggingFace Embedding
index = VectorStoreIndex.from_documents(documents)
# Create Prompt Template for Text-based Q&A
chat_text_qa_msgs = [
(
"user",
"""You are a Q&A assistant. For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
Context:
{context_str}
Question:
{query_str}
"""
)
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
# Initialize Chat Memory Buffer for Conversation Memory
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
# Create Query Engine with LLM and Template
query_engine = index.as_query_engine(
text_qa_template=text_qa_template,
# streaming=True,
memory=memory
)
if 'query_engine' not in st.session_state:
st.session_state.query_engine = query_engine
print("# loaded", datetime.now())
# Function to handle queries
def handle_query(query):
return st.session_state.query_engine.query(query)
# streaming_response = st.session_state.query_engine.query(query)
# for text in streaming_response.response_gen:
# yield text
print("-- check", datetime.now())
# ============== Streamlit App ===============
st.title("POC LLM RAG ✅")
st.markdown("Retrieval-Augmented Generation (RAG) with Large Language Model (LLM) using llama-index library and Ollama.")
st.markdown("start chat ...🚀")
if 'messages' not in st.session_state:
st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Ask me anything about the documents.'}]
# Sidebar to list documents
with st.sidebar:
st.title("Documents:")
docs = SimpleDirectoryReader(DATA_DIR).list_resources()
for d in docs:
file_name = str(d).split('/')[-1]
st.info(file_name)
# for message in st.session_state.messages:
# with st.chat_message(message["role"]):
# st.markdown(message["content"])
# if prompt := st.chat_input("Ask me anything about the documents"):
# st.session_state.messages.append({"role": "user", "content": prompt})
# with st.chat_message("user"):
# st.markdown(prompt)
# with st.chat_message("assistant"):
# stream = handle_query(prompt)
# response = st.write_stream(stream)
# st.session_state.messages.append({"role": "assistant", "content": response})
user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
if user_prompt:
st.session_state.messages.append({'role': 'user', "content": user_prompt})
response = handle_query(user_prompt)
st.session_state.messages.append({'role': 'assistant', "content": response})
for message in st.session_state.messages:
with st.chat_message(message['role']):
st.write(message['content'])
|