|
import os |
|
from dotenv import load_dotenv |
|
import gradio as gr |
|
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate |
|
from llama_index.llms.huggingface import HuggingFaceInferenceAPI |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
from sentence_transformers import SentenceTransformer |
|
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings |
|
load_dotenv() |
|
|
|
Settings.llm = HuggingFaceInferenceAPI( |
|
model_name="google/gemma-1.1-7b-it", |
|
tokenizer_name="google/gemma-1.1-7b-it", |
|
context_window=3000, |
|
token=os.getenv("HF_TOKEN"), |
|
max_new_tokens=512, |
|
generate_kwargs={"temperature": 0.1}, |
|
) |
|
Settings.embed_model = HuggingFaceEmbedding( |
|
model_name="BAAI/bge-small-en-v1.5" |
|
) |
|
|
|
|
|
PERSIST_DIR = "db" |
|
PDF_DIRECTORY = 'data' |
|
|
|
|
|
os.makedirs(PDF_DIRECTORY, exist_ok=True) |
|
os.makedirs(PERSIST_DIR, exist_ok=True) |
|
|
|
def data_ingestion_from_directory(): |
|
|
|
documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data() |
|
storage_context = StorageContext.from_defaults() |
|
index = VectorStoreIndex.from_documents(documents) |
|
index.storage_context.persist(persist_dir=PERSIST_DIR) |
|
|
|
def handle_query(query): |
|
chat_text_qa_msgs = [ |
|
( |
|
"user", |
|
""" |
|
You are a Q&A assistant named RedfernsTech, created by the RedfernsTech team. You have been designed to provide accurate answers based on the context provided. |
|
Context: |
|
{context_str} |
|
Question: |
|
{query_str} |
|
""" |
|
) |
|
] |
|
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs) |
|
|
|
|
|
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) |
|
index = load_index_from_storage(storage_context) |
|
|
|
query_engine = index.as_query_engine(text_qa_template=text_qa_template) |
|
answer = query_engine.query(query) |
|
|
|
if hasattr(answer, 'response'): |
|
return answer.response |
|
elif isinstance(answer, dict) and 'response' in answer: |
|
return answer['response'] |
|
else: |
|
return "Sorry, I couldn't find an answer." |
|
|
|
|
|
|
|
|
|
print("Processing PDF ingestion from directory:", PDF_DIRECTORY) |
|
data_ingestion_from_directory() |
|
|
|
|
|
query = "How do I use the RedfernsTech Q&A assistant?" |
|
print("Query:", query) |
|
response = handle_query(query) |
|
print("Answer:", response) |
|
|
|
|
|
|
|
|
|
|
|
input_component = gr.Textbox( |
|
show_label=False, |
|
placeholder="Ask me anything about the document..." |
|
) |
|
|
|
output_component = gr.Textbox() |
|
|
|
|
|
interface = gr.Interface( |
|
fn=handle_query, |
|
inputs=input_component, |
|
outputs=output_component, |
|
title="RedfernsTech Q&A Chatbot", |
|
description="Ask me anything about the uploaded document." |
|
) |
|
|
|
|
|
interface.launch(server_port=7861, share=True) |
|
|