Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from pathlib import Path | |
from transformers import RagTokenForGeneration, RagTokenizer | |
import faiss | |
from typing import List | |
from pdfplumber import open as open_pdf | |
""" | |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
""" | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
# Load the PDF file | |
pdf_path = Path("apexcustoms.pdf") | |
with open_pdf(pdf_path) as pdf: | |
text = "\n".join(page.extract_text() for page in pdf.pages) | |
# Split the PDF text into chunks | |
chunk_size = 1000 # Adjust this value based on your needs | |
text_chunks: List[str] = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
# Load the RAG model and tokenizer for retrieval | |
rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq") | |
rag_model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq") | |
# Create an in-memory index using the text chunks | |
embeddings = rag_model.question_encoder(rag_tokenizer(text_chunks, padding=True, return_tensors="pt")["input_ids"]) | |
index = faiss.IndexFlatL2(embeddings.size(-1)) | |
index.add(embeddings.detach().numpy()) | |
# Custom retriever class | |
class CustomRetriever: | |
def __init__(self, documents, embeddings, index): | |
self.documents = documents | |
self.embeddings = embeddings | |
self.index = index | |
def get_relevant_docs(self, query_embeddings, top_k=4): | |
scores, doc_indices = self.index.search(query_embeddings.detach().numpy(), top_k) | |
return [(self.documents[doc_idx], score) for doc_idx, score in zip(doc_indices[0], scores[0])] | |
# Create a custom retriever instance | |
retriever = CustomRetriever(text_chunks, embeddings, index) | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
): | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
# Retrieve relevant chunks using the custom retriever | |
rag_input_ids = rag_tokenizer(message, return_tensors="pt").input_ids | |
query_embeddings = rag_model.question_encoder(rag_input_ids) | |
relevant_docs = retriever.get_relevant_docs(query_embeddings) | |
retrieved_text = "\n".join([doc for doc, _ in relevant_docs]) | |
# Generate the response using the zephyr model | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
files={"context": retrieved_text}, # Pass retrieved text as context | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
""" | |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | |
""" | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be conversational in your responses. You should remember the user car model and tailor your answers accordingly. You limit yourself to answering the given question and maybe propose a suggestion but not write the next question of the user. \n\nUser: ", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() |