DHEIVER commited on
Commit
41b022e
·
verified ·
1 Parent(s): ff20866

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -40,12 +40,21 @@ def initialize_retriever(file_objs, persist_directory="chroma_db"):
40
 
41
  documents = []
42
  for file_obj in file_objs:
43
- loader = PyPDFLoader(file_obj.name)
44
- raw_docs = loader.load()
45
- for doc in raw_docs:
46
- doc.page_content = preprocess_text(doc.page_content)
47
- doc.metadata.update({"source": os.path.basename(file_obj.name)})
48
- documents.extend(raw_docs)
 
 
 
 
 
 
 
 
 
49
 
50
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=128)
51
  splits = text_splitter.split_documents(documents)
@@ -130,6 +139,7 @@ def create_demo():
130
  gr.Textbox(label="System Prompt", placeholder="Digite um prompt de sistema (opcional)", value=None)
131
  ],
132
  title="",
 
133
  )
134
 
135
  process_btn.click(
 
40
 
41
  documents = []
42
  for file_obj in file_objs:
43
+ # Validar se é um PDF
44
+ if not file_obj.name.lower().endswith('.pdf'):
45
+ return f"Erro: O arquivo '{file_obj.name}' não é um PDF válido. Apenas arquivos .pdf são aceitos."
46
+ try:
47
+ loader = PyPDFLoader(file_obj.name)
48
+ raw_docs = loader.load()
49
+ for doc in raw_docs:
50
+ doc.page_content = preprocess_text(doc.page_content)
51
+ doc.metadata.update({"source": os.path.basename(file_obj.name)})
52
+ documents.extend(raw_docs)
53
+ except Exception as e:
54
+ return f"Erro ao processar '{file_obj.name}': {str(e)}"
55
+
56
+ if not documents:
57
+ return "Nenhum conteúdo válido foi extraído dos PDFs."
58
 
59
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=128)
60
  splits = text_splitter.split_documents(documents)
 
139
  gr.Textbox(label="System Prompt", placeholder="Digite um prompt de sistema (opcional)", value=None)
140
  ],
141
  title="",
142
+ chatbot=gr.Chatbot(type="messages") # Atualizar para o formato 'messages'
143
  )
144
 
145
  process_btn.click(