Spaces:

muradkhan
/

indemo

Paused

App Files Files Community

muradkhan commited on Jul 24

Commit

86d3138

•

1 Parent(s): ac61621

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -32

app.py CHANGED Viewed

@@ -1,11 +1,9 @@
 from pprint import pprint
-from haystack import Document, Pipeline
-from haystack.nodes import BM25Retriever
-from haystack.document_stores import InMemoryDocumentStore
-from haystack.nodes import PromptTemplate, PromptNode
 from PyPDF2 import PdfReader
 import gradio as gr
 import os
 # Function to read PDF file content directly
 def read_pdf(pdf_path):
@@ -19,38 +17,14 @@ def read_pdf(pdf_path):
 def process_invoice(file, hf_token, questions):
  # Read the PDF content directly
  pdf_content = read_pdf(file.name)
- document = Document(content=pdf_content)
- docs = [document]
- document_store = InMemoryDocumentStore(use_bm25=True)
- document_store.write_documents(docs)
- retriever = BM25Retriever(document_store, top_k=2)
- qa_template = PromptTemplate(prompt=
- """ Using exclusively the information contained in the context, answer only the question asked without adding
- suggestions for possible questions, and respond exclusively in English. If the answer cannot be deduced from the
- context, Don't add anything from the references if it is not asked explicitly. Do not repeat the same information twice
- respond: "Not sure because not relevant to the context.
- Context: {join(documents)};
- Question: {query}
- """)
- prompt_node = PromptNode(
- model_name_or_path='mistralai/Mixtral-8x7B-Instruct-v0.1',
- api_key=hf_token,
- default_prompt_template=qa_template,
- max_length=500,
- model_kwargs={"model_max_length": 5000}
- )
- rag_pipeline = Pipeline()
- rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
- rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
  answers = {}
  for question in questions.split(','):
- result = rag_pipeline.run(query=question.strip())
- answers[question] = result["results"][0].strip()
  return answers

 from pprint import pprint
+from getpass import getpass
 from PyPDF2 import PdfReader
 import gradio as gr
 import os
+from transformers import pipeline
 # Function to read PDF file content directly
 def read_pdf(pdf_path):
 def process_invoice(file, hf_token, questions):
  # Read the PDF content directly
  pdf_content = read_pdf(file.name)
+ # Initialize the Hugging Face pipeline
+ qa_pipeline = pipeline("question-answering", model="mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
  answers = {}
  for question in questions.split(','):
+ result = qa_pipeline(question=question.strip(), context=pdf_content)
+ answers[question] = result['answer']
  return answers