Spaces:
Runtime error
Runtime error
# Importa las bibliotecas necesarias | |
import os | |
import gradio as gr | |
import random | |
import requests | |
from getpass import getpass | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains import RetrievalQA | |
from langchain.chains.conversation.memory import ConversationBufferMemory | |
# Agrega el API KEY | |
OPENAI_API_KEY = "COLOCAR APIKEY" | |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
# Lista de URLs de los documentos PDF | |
urls = [ | |
# Agrega tus URLs aqu铆 | |
] | |
# Descarga y carga los documentos PDF | |
pdfdoc = [] | |
for i, url in enumerate(urls): | |
response = requests.get(url) | |
filename = f'documento{i+1}.pdf' | |
with open(filename, "wb") as f: | |
f.write(response.content) | |
print(f"{filename} fue descargado con 茅xito.") | |
loader = PyPDFLoader(filename) | |
data = loader.load() | |
pdfdoc.extend(data) | |
# Divide el texto de los documentos | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200, length_function=len) | |
documents = text_splitter.split_documents(pdfdoc) | |
# Crea la base de datos vectorial | |
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") | |
vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings) | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 2}) | |
# Crea los modelos de chat y cadenas para consultar informaci贸n | |
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo", temperature=0.0) | |
memory = ConversationBufferMemory() | |
qa_chain = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever, memory=memory) | |
# Funci贸n para chatear con el modelo | |
def chat_with_model(message, history): | |
response = generate_response_from_documents(message, documents) | |
if response == "No se encontr贸 informaci贸n relevante en los documentos.": | |
model_response = qa_chain.run(message) | |
response = model_response | |
return response | |
# Funci贸n para generar respuestas basadas en la informaci贸n de los documentos PDF | |
def generate_response_from_documents(message, documents): | |
user_tokens = message.split() | |
found_responses = [] | |
for doc_index, document in enumerate(documents): | |
for page_index, page_content in enumerate(document): | |
if any(keyword in page_content for keyword in user_tokens): | |
found_responses.append({"source": f"documento{doc_index + 1}.pdf", "page": page_index, "content": page_content}) | |
if found_responses: | |
response = random.choice(found_responses) | |
return f"Encontr茅 informaci贸n en {response['source']}, p谩gina {response['page']}:\n{response['content']}" | |
else: | |
return "No se encontr贸 informaci贸n relevante en los documentos." | |
# Crea la interfaz Gradio | |
gr.ChatInterface( | |
chat_with_model, | |
chatbot=gr.Chatbot(height=200), | |
textbox=gr.Textbox(placeholder="Hazme una pregunta", container=False, scale=7), | |
title="Bienvenido a LechuzoBot", | |
description="En que puedo ayudarte el dia de hoy?", | |
theme="soft", | |
examples=["Que es la Universidad Tecnologica de Tecamac?", "Numero telefonico donde puedo comunicarme"], | |
cache_examples=True, | |
retry_btn=None, | |
undo_btn="Eliminar respuesta anterior", | |
clear_btn="Limpiar", | |
).launch() | |