hac / app.py
srjosueaaron's picture
Create app.py
153d162 verified
raw
history blame contribute delete
No virus
3.4 kB
# Importa las bibliotecas necesarias
import os
import gradio as gr
import random
import requests
from getpass import getpass
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.conversation.memory import ConversationBufferMemory
# Agrega el API KEY
OPENAI_API_KEY = "COLOCAR APIKEY"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
# Lista de URLs de los documentos PDF
urls = [
# Agrega tus URLs aqu铆
]
# Descarga y carga los documentos PDF
pdfdoc = []
for i, url in enumerate(urls):
response = requests.get(url)
filename = f'documento{i+1}.pdf'
with open(filename, "wb") as f:
f.write(response.content)
print(f"{filename} fue descargado con 茅xito.")
loader = PyPDFLoader(filename)
data = loader.load()
pdfdoc.extend(data)
# Divide el texto de los documentos
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200, length_function=len)
documents = text_splitter.split_documents(pdfdoc)
# Crea la base de datos vectorial
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
# Crea los modelos de chat y cadenas para consultar informaci贸n
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo", temperature=0.0)
memory = ConversationBufferMemory()
qa_chain = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever, memory=memory)
# Funci贸n para chatear con el modelo
def chat_with_model(message, history):
response = generate_response_from_documents(message, documents)
if response == "No se encontr贸 informaci贸n relevante en los documentos.":
model_response = qa_chain.run(message)
response = model_response
return response
# Funci贸n para generar respuestas basadas en la informaci贸n de los documentos PDF
def generate_response_from_documents(message, documents):
user_tokens = message.split()
found_responses = []
for doc_index, document in enumerate(documents):
for page_index, page_content in enumerate(document):
if any(keyword in page_content for keyword in user_tokens):
found_responses.append({"source": f"documento{doc_index + 1}.pdf", "page": page_index, "content": page_content})
if found_responses:
response = random.choice(found_responses)
return f"Encontr茅 informaci贸n en {response['source']}, p谩gina {response['page']}:\n{response['content']}"
else:
return "No se encontr贸 informaci贸n relevante en los documentos."
# Crea la interfaz Gradio
gr.ChatInterface(
chat_with_model,
chatbot=gr.Chatbot(height=200),
textbox=gr.Textbox(placeholder="Hazme una pregunta", container=False, scale=7),
title="Bienvenido a LechuzoBot",
description="En que puedo ayudarte el dia de hoy?",
theme="soft",
examples=["Que es la Universidad Tecnologica de Tecamac?", "Numero telefonico donde puedo comunicarme"],
cache_examples=True,
retry_btn=None,
undo_btn="Eliminar respuesta anterior",
clear_btn="Limpiar",
).launch()