sandrocalzada commited on
Commit
43b6956
1 Parent(s): b649592

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py CHANGED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
4
+ from dotenv import load_dotenv
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from llama_index.core import Settings
7
+ import os
8
+ import base64
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Configure the Llama index settings
14
+ Settings.llm = HuggingFaceInferenceAPI(
15
+ model_name="google/gemma-1.1-7b-it",
16
+ tokenizer_name="google/gemma-1.1-7b-it",
17
+ context_window=3000,
18
+ token=os.getenv("HF_TOKEN"),
19
+ max_new_tokens=512,
20
+ generate_kwargs={"temperature": 0.1},
21
+ )
22
+ Settings.embed_model = HuggingFaceEmbedding(
23
+ model_name="BAAI/bge-small-en-v1.5"
24
+ )
25
+
26
+ # Define the directory for persistent storage and data
27
+ PERSIST_DIR = "./db"
28
+ DATA_DIR = "data"
29
+
30
+ # Ensure data directory exists
31
+ os.makedirs(DATA_DIR, exist_ok=True)
32
+ os.makedirs(PERSIST_DIR, exist_ok=True)
33
+
34
+ def displayPDF(file):
35
+ with open(file, "rb") as f:
36
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
37
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
38
+ st.markdown(pdf_display, unsafe_allow_html=True)
39
+
40
+ def data_ingestion():
41
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
42
+ storage_context = StorageContext.from_defaults()
43
+ index = VectorStoreIndex.from_documents(documents)
44
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
45
+
46
+ def handle_query(query):
47
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
48
+ index = load_index_from_storage(storage_context)
49
+ chat_text_qa_msgs = [
50
+ (
51
+ "user",
52
+ """You are a Q&A assistant. Your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document. Provides the answers in Spanish and cite the page where the answers was found.
53
+ Context:
54
+ {context_str}
55
+ Question:
56
+ {query_str}
57
+ """
58
+ )
59
+ ]
60
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
61
+
62
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
63
+ answer = query_engine.query(query)
64
+
65
+ if hasattr(answer, 'response'):
66
+ return answer.response
67
+ elif isinstance(answer, dict) and 'response' in answer:
68
+ return answer['response']
69
+ else:
70
+ return "Disculpa no pude encontrar una respuesta."
71
+
72
+
73
+ # Streamlit app initialization
74
+ st.title("(PDF) Chat con documentos🗞️")
75
+ st.markdown("Retrieval-Augmented Generation")
76
+ st.markdown("iniciar chat ...🚀")
77
+
78
+ if 'messages' not in st.session_state:
79
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hola! Selecciona un pdf para cargar, y hazme una pregunta.'}]
80
+
81
+ with st.sidebar:
82
+ st.title("Menu:")
83
+ uploaded_file = st.file_uploader("Sube un archivo PDF y dale click al botón enviar y procesar.")
84
+ if st.button("Enviar y Procesar):
85
+ with st.spinner("Procesando..."):
86
+ filepath = "data/saved_pdf.pdf"
87
+ with open(filepath, "wb") as f:
88
+ f.write(uploaded_file.getbuffer())
89
+ # displayPDF(filepath) # Display the uploaded PDF
90
+ data_ingestion() # Process PDF every time new file is uploaded
91
+ st.success("Done")
92
+
93
+ user_prompt = st.chat_input("Pregunta acerca del contenido en el archivo PDF:")
94
+ if user_prompt:
95
+ st.session_state.messages.append({'role': 'user', "content": user_prompt})
96
+ response = handle_query(user_prompt)
97
+ st.session_state.messages.append({'role': 'assistant', "content": response})
98
+
99
+ for message in st.session_state.messages:
100
+ with st.chat_message(message['role']):
101
+ st.write(message['content'])