Spaces:

Moha782
/

GenAIEfrei

Sleeping

App Files Files Community

GenAIEfrei / app.py

Moha782

Update app.py

8cba427 verified 7 months ago

raw

history blame

3.77 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from pathlib import Path
	from transformers import RagTokenForGeneration, RagTokenizer
	import faiss
	from typing import List
	from pdfplumber import open as open_pdf

	"""
	For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	"""
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	# Load the PDF file
	pdf_path = Path("apexcustoms.pdf")
	with open_pdf(pdf_path) as pdf:
	text = "\n".join(page.extract_text() for page in pdf.pages)

	# Split the PDF text into chunks
	chunk_size = 1000 # Adjust this value based on your needs
	text_chunks: List[str] = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

	# Load the RAG model and tokenizer for retrieval
	rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
	rag_model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq")

	# Create an in-memory index using the text chunks
	embeddings = rag_model.question_encoder(rag_tokenizer(text_chunks, padding=True, return_tensors="pt")["input_ids"])
	index = faiss.IndexFlatL2(embeddings.size(-1))
	index.add(embeddings.detach().numpy())

	# Custom retriever class
	class CustomRetriever:
	def __init__(self, documents, embeddings, index):
	self.documents = documents
	self.embeddings = embeddings
	self.index = index

	def get_relevant_docs(self, query_embeddings, top_k=4):
	scores, doc_indices = self.index.search(query_embeddings.detach().numpy(), top_k)
	return [(self.documents[doc_idx], score) for doc_idx, score in zip(doc_indices[0], scores[0])]

	# Create a custom retriever instance
	retriever = CustomRetriever(text_chunks, embeddings, index)

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	):
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	response = ""

	# Retrieve relevant chunks using the custom retriever
	rag_input_ids = rag_tokenizer(message, return_tensors="pt").input_ids
	query_embeddings = rag_model.question_encoder(rag_input_ids)
	relevant_docs = retriever.get_relevant_docs(query_embeddings)
	retrieved_text = "\n".join([doc for doc, _ in relevant_docs])

	# Generate the response using the zephyr model
	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	files={"context": retrieved_text}, # Pass retrieved text as context
	):
	token = message.choices[0].delta.content
	response += token
	yield response

	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be conversational in your responses. You should remember the user car model and tailor your answers accordingly. You limit yourself to answering the given question and maybe propose a suggestion but not write the next question of the user. \n\nUser: ", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	],
	)

	if __name__ == "__main__":
	demo.launch()