Spaces:

sandrocalzada
/

genai-cooperativo

Sleeping

App Files Files Community

sandrocalzada commited on Aug 8

Commit

43b6956

•

1 Parent(s): b649592

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -0

app.py CHANGED Viewed

	@@ -0,0 +1,101 @@

+import streamlit as st
+from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
+from llama_index.llms.huggingface import HuggingFaceInferenceAPI
+from dotenv import load_dotenv
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import Settings
+import os
+import base64
+# Load environment variables
+load_dotenv()
+# Configure the Llama index settings
+Settings.llm = HuggingFaceInferenceAPI(
+ model_name="google/gemma-1.1-7b-it",
+ tokenizer_name="google/gemma-1.1-7b-it",
+ context_window=3000,
+ token=os.getenv("HF_TOKEN"),
+ max_new_tokens=512,
+ generate_kwargs={"temperature": 0.1},
+)
+Settings.embed_model = HuggingFaceEmbedding(
+ model_name="BAAI/bge-small-en-v1.5"
+)
+# Define the directory for persistent storage and data
+PERSIST_DIR = "./db"
+DATA_DIR = "data"
+# Ensure data directory exists
+os.makedirs(DATA_DIR, exist_ok=True)
+os.makedirs(PERSIST_DIR, exist_ok=True)
+def displayPDF(file):
+ with open(file, "rb") as f:
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
+ st.markdown(pdf_display, unsafe_allow_html=True)
+def data_ingestion():
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
+ storage_context = StorageContext.from_defaults()
+ index = VectorStoreIndex.from_documents(documents)
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
+def handle_query(query):
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+ index = load_index_from_storage(storage_context)
+ chat_text_qa_msgs = [
+ (
+ "user",
+ """You are a Q&A assistant. Your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document. Provides the answers in Spanish and cite the page where the answers was found.
+ Context:
+ {context_str}
+ Question:
+ {query_str}
+ """
+ )
+ ]
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
+ answer = query_engine.query(query)
+ if hasattr(answer, 'response'):
+ return answer.response
+ elif isinstance(answer, dict) and 'response' in answer:
+ return answer['response']
+ else:
+ return "Disculpa no pude encontrar una respuesta."
+# Streamlit app initialization
+st.title("(PDF) Chat con documentos🗞️")
+st.markdown("Retrieval-Augmented Generation")
+st.markdown("iniciar chat ...🚀")
+if 'messages' not in st.session_state:
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hola! Selecciona un pdf para cargar, y hazme una pregunta.'}]
+with st.sidebar:
+ st.title("Menu:")
+ uploaded_file = st.file_uploader("Sube un archivo PDF y dale click al botón enviar y procesar.")
+ if st.button("Enviar y Procesar):
+ with st.spinner("Procesando..."):
+ filepath = "data/saved_pdf.pdf"
+ with open(filepath, "wb") as f:
+ f.write(uploaded_file.getbuffer())
+ # displayPDF(filepath) # Display the uploaded PDF
+ data_ingestion() # Process PDF every time new file is uploaded
+ st.success("Done")
+user_prompt = st.chat_input("Pregunta acerca del contenido en el archivo PDF:")
+if user_prompt:
+ st.session_state.messages.append({'role': 'user', "content": user_prompt})
+ response = handle_query(user_prompt)
+ st.session_state.messages.append({'role': 'assistant', "content": response})
+for message in st.session_state.messages:
+ with st.chat_message(message['role']):
+ st.write(message['content'])