muradkhan commited on
Commit
86d3138
1 Parent(s): ac61621

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -32
app.py CHANGED
@@ -1,11 +1,9 @@
1
  from pprint import pprint
2
- from haystack import Document, Pipeline
3
- from haystack.nodes import BM25Retriever
4
- from haystack.document_stores import InMemoryDocumentStore
5
- from haystack.nodes import PromptTemplate, PromptNode
6
  from PyPDF2 import PdfReader
7
  import gradio as gr
8
  import os
 
9
 
10
  # Function to read PDF file content directly
11
  def read_pdf(pdf_path):
@@ -19,38 +17,14 @@ def read_pdf(pdf_path):
19
  def process_invoice(file, hf_token, questions):
20
  # Read the PDF content directly
21
  pdf_content = read_pdf(file.name)
22
- document = Document(content=pdf_content)
23
- docs = [document]
24
 
25
- document_store = InMemoryDocumentStore(use_bm25=True)
26
- document_store.write_documents(docs)
27
- retriever = BM25Retriever(document_store, top_k=2)
28
-
29
- qa_template = PromptTemplate(prompt=
30
- """ Using exclusively the information contained in the context, answer only the question asked without adding
31
- suggestions for possible questions, and respond exclusively in English. If the answer cannot be deduced from the
32
- context, Don't add anything from the references if it is not asked explicitly. Do not repeat the same information twice
33
- respond: "Not sure because not relevant to the context.
34
- Context: {join(documents)};
35
- Question: {query}
36
- """)
37
-
38
- prompt_node = PromptNode(
39
- model_name_or_path='mistralai/Mixtral-8x7B-Instruct-v0.1',
40
- api_key=hf_token,
41
- default_prompt_template=qa_template,
42
- max_length=500,
43
- model_kwargs={"model_max_length": 5000}
44
- )
45
-
46
- rag_pipeline = Pipeline()
47
- rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
48
- rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
49
 
50
  answers = {}
51
  for question in questions.split(','):
52
- result = rag_pipeline.run(query=question.strip())
53
- answers[question] = result["results"][0].strip()
54
 
55
  return answers
56
 
 
1
  from pprint import pprint
2
+ from getpass import getpass
 
 
 
3
  from PyPDF2 import PdfReader
4
  import gradio as gr
5
  import os
6
+ from transformers import pipeline
7
 
8
  # Function to read PDF file content directly
9
  def read_pdf(pdf_path):
 
17
  def process_invoice(file, hf_token, questions):
18
  # Read the PDF content directly
19
  pdf_content = read_pdf(file.name)
 
 
20
 
21
+ # Initialize the Hugging Face pipeline
22
+ qa_pipeline = pipeline("question-answering", model="mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  answers = {}
25
  for question in questions.split(','):
26
+ result = qa_pipeline(question=question.strip(), context=pdf_content)
27
+ answers[question] = result['answer']
28
 
29
  return answers
30