Cesar42 commited on
Commit
efbe976
·
verified ·
1 Parent(s): e9644d6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +52 -23
  2. requirements.txt +0 -2
app.py CHANGED
@@ -2,22 +2,27 @@ import streamlit as st
2
  import os
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
- from langchain.embeddings import HuggingFaceHubEmbeddings # Cambiado desde HuggingFaceEmbeddings
6
  from langchain.vectorstores import FAISS
7
- from langchain.chat_models import ChatOpenAI
8
  from langchain.chains.question_answering import load_qa_chain
 
9
 
10
  st.set_page_config(page_title='preguntaDOC')
11
  st.header("Pregunta a tu PDF")
12
 
13
- OPENAI_API_KEY = st.text_input('OpenAI API Key', type='password')
14
- HUGGINGFACE_API_KEY = st.text_input('Hugging Face API Key', type='password') # Añadido para la API de Hugging Face
15
 
16
  pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
17
 
18
  @st.cache_resource
19
- def create_embeddings(pdf, hf_api_key):
20
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_api_key # Configurar token de HF
 
 
 
 
21
 
22
  pdf_reader = PdfReader(pdf)
23
  text = ""
@@ -32,29 +37,53 @@ def create_embeddings(pdf, hf_api_key):
32
  chunks = text_splitter.split_text(text)
33
 
34
  # Usar HuggingFaceHubEmbeddings en lugar de HuggingFaceEmbeddings
 
35
  embeddings = HuggingFaceHubEmbeddings(
36
  repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
37
- huggingfacehub_api_token=hf_api_key
38
  )
39
 
40
  knowledge_base = FAISS.from_texts(chunks, embeddings)
41
  return knowledge_base
42
 
43
- if pdf_obj and HUGGINGFACE_API_KEY:
44
- knowledge_base = create_embeddings(pdf_obj, HUGGINGFACE_API_KEY)
45
- user_question = st.text_input("Haz una pregunta sobre tu PDF:")
46
 
47
- if user_question and OPENAI_API_KEY:
48
- os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
49
- docs = knowledge_base.similarity_search(user_question, 3)
50
- llm = ChatOpenAI(model_name='gpt-3.5-turbo')
51
- chain = load_qa_chain(llm, chain_type="stuff")
52
 
53
- with st.spinner("Procesando tu pregunta..."):
54
- try:
55
- respuesta = chain.run(input_documents=docs, question=user_question)
56
- st.write(respuesta)
57
- except Exception as e:
58
- st.error(f"Error: {str(e)}")
59
- elif pdf_obj and not HUGGINGFACE_API_KEY:
60
- st.warning("Por favor, introduce una clave API de Hugging Face para procesar el documento.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceHubEmbeddings
6
  from langchain.vectorstores import FAISS
7
+ from langchain.llms import HuggingFaceHub
8
  from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
 
11
  st.set_page_config(page_title='preguntaDOC')
12
  st.header("Pregunta a tu PDF")
13
 
14
+ # Campo para el token de Hugging Face (ahora requerido para los embeddings)
15
+ huggingface_api_token = st.text_input('Hugging Face API Token (requerido)', type='password')
16
 
17
  pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
18
 
19
  @st.cache_resource
20
+ def create_embeddings(pdf, api_token):
21
+ if not api_token:
22
+ st.error("Se requiere un token de API de Hugging Face")
23
+ return None
24
+
25
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
26
 
27
  pdf_reader = PdfReader(pdf)
28
  text = ""
 
37
  chunks = text_splitter.split_text(text)
38
 
39
  # Usar HuggingFaceHubEmbeddings en lugar de HuggingFaceEmbeddings
40
+ # Este enfoque no requiere sentence-transformers instalado localmente
41
  embeddings = HuggingFaceHubEmbeddings(
42
  repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
43
+ huggingfacehub_api_token=api_token
44
  )
45
 
46
  knowledge_base = FAISS.from_texts(chunks, embeddings)
47
  return knowledge_base
48
 
49
+ if pdf_obj and huggingface_api_token:
50
+ knowledge_base = create_embeddings(pdf_obj, huggingface_api_token)
 
51
 
52
+ if knowledge_base:
53
+ user_question = st.text_input("Haz una pregunta sobre tu PDF:")
 
 
 
54
 
55
+ if user_question:
56
+ docs = knowledge_base.similarity_search(user_question, 3)
57
+
58
+ # Usar un modelo gratuito de Hugging Face
59
+ llm = HuggingFaceHub(
60
+ repo_id="google/flan-t5-large",
61
+ huggingfacehub_api_token=huggingface_api_token,
62
+ model_kwargs={"temperature": 0.5, "max_length": 512}
63
+ )
64
+
65
+ prompt_template = """
66
+ Responde a la siguiente pregunta basándote únicamente en el contexto proporcionado.
67
+
68
+ Contexto: {context}
69
+
70
+ Pregunta: {question}
71
+
72
+ Respuesta:
73
+ """
74
+
75
+ PROMPT = PromptTemplate(
76
+ template=prompt_template,
77
+ input_variables=["context", "question"]
78
+ )
79
+
80
+ chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
81
+
82
+ with st.spinner("Procesando tu pregunta..."):
83
+ try:
84
+ respuesta = chain.run(input_documents=docs, question=user_question)
85
+ st.write(respuesta)
86
+ except Exception as e:
87
+ st.error(f"Error al procesar tu pregunta: {str(e)}")
88
+ elif not huggingface_api_token and pdf_obj:
89
+ st.warning("Por favor, ingresa tu token de API de Hugging Face para continuar.")
requirements.txt CHANGED
@@ -9,5 +9,3 @@ accelerate==0.20.3
9
  einops==0.6.1
10
  protobuf==3.20.3
11
  tiktoken==0.4.0
12
- openai==0.28.1
13
-
 
9
  einops==0.6.1
10
  protobuf==3.20.3
11
  tiktoken==0.4.0