Spaces:
Running
Running
File size: 3,285 Bytes
70595a6 efbe976 70595a6 efbe976 70595a6 efbe976 70595a6 691efb9 70595a6 691efb9 efbe976 691efb9 70595a6 efbe976 691efb9 70595a6 691efb9 70595a6 691efb9 70595a6 691efb9 efbe976 691efb9 efbe976 691efb9 70595a6 efbe976 691efb9 efbe976 691efb9 efbe976 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFaceHub
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
st.set_page_config(page_title='preguntaDOC')
st.header("Pregunta a tu PDF")
# Campo para el token de Hugging Face (ahora requerido para los embeddings)
huggingface_api_token = st.text_input('Hugging Face API Token (requerido)', type='password')
pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
@st.cache_resource
def create_embeddings(pdf, api_token):
if not api_token:
st.error("Se requiere un token de API de Hugging Face")
return None
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
pdf_reader = PdfReader(pdf)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=100,
length_function=len
)
chunks = text_splitter.split_text(text)
# Usar HuggingFaceHubEmbeddings en lugar de HuggingFaceEmbeddings
# Este enfoque no requiere sentence-transformers instalado localmente
embeddings = HuggingFaceHubEmbeddings(
repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
huggingfacehub_api_token=api_token
)
knowledge_base = FAISS.from_texts(chunks, embeddings)
return knowledge_base
if pdf_obj and huggingface_api_token:
knowledge_base = create_embeddings(pdf_obj, huggingface_api_token)
if knowledge_base:
user_question = st.text_input("Haz una pregunta sobre tu PDF:")
if user_question:
docs = knowledge_base.similarity_search(user_question, 3)
# Usar un modelo gratuito de Hugging Face
llm = HuggingFaceHub(
repo_id="google/flan-t5-large",
huggingfacehub_api_token=huggingface_api_token,
model_kwargs={"temperature": 0.5, "max_length": 512}
)
prompt_template = """
Responde a la siguiente pregunta basándote únicamente en el contexto proporcionado.
Contexto: {context}
Pregunta: {question}
Respuesta:
"""
PROMPT = PromptTemplate(
template=prompt_template,
input_variables=["context", "question"]
)
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
with st.spinner("Procesando tu pregunta..."):
try:
respuesta = chain.run(input_documents=docs, question=user_question)
st.write(respuesta)
except Exception as e:
st.error(f"Error al procesar tu pregunta: {str(e)}")
elif not huggingface_api_token and pdf_obj:
st.warning("Por favor, ingresa tu token de API de Hugging Face para continuar.")
|