root commited on
Commit
09c3d85
1 Parent(s): 1aa4f12

add custom pipeline

Browse files
__pycache__/pipeline.cpython-310.pyc CHANGED
Binary files a/__pycache__/pipeline.cpython-310.pyc and b/__pycache__/pipeline.cpython-310.pyc differ
 
pipeline.py CHANGED
@@ -3,10 +3,11 @@ from transformers import pipeline
3
  import holidays
4
  import PIL.Image
5
  import io
 
6
 
7
  class PreTrainedPipeline():
8
  def __init__(self, model_path="PrimWong/layout_qa_hparam_tuning"):
9
- # Initialize the document-question-answering pipeline with the specified model
10
  self.pipeline = pipeline("document-question-answering", model=model_path)
11
  self.holidays = holidays.US()
12
 
@@ -15,19 +16,29 @@ class PreTrainedPipeline():
15
  Process input data for document question answering with optional holiday checking.
16
 
17
  Args:
18
- data (Dict[str, Any]): Input data containing a 'text' field possibly along with 'image',
19
  and optionally a 'date' field.
20
 
21
  Returns:
22
- str: The answer or processed information based on the text, or a holiday message if applicable.
23
  """
24
- text = data.get("inputs")
25
  date = data.get("date")
26
 
27
- # Check if the date is a holiday
28
  if date and date in self.holidays:
29
  return "Today is a holiday!"
30
 
31
- # Run prediction using only the text input
32
- prediction = self.pipeline(question=text, image="What information do you need?")
 
 
 
 
 
 
 
 
33
  return prediction["answer"] # Adjust based on actual output format of the model
 
 
 
3
  import holidays
4
  import PIL.Image
5
  import io
6
+ import pytesseract
7
 
8
  class PreTrainedPipeline():
9
  def __init__(self, model_path="PrimWong/layout_qa_hparam_tuning"):
10
+ # Initializing the document-question-answering pipeline with the specified model
11
  self.pipeline = pipeline("document-question-answering", model=model_path)
12
  self.holidays = holidays.US()
13
 
 
16
  Process input data for document question answering with optional holiday checking.
17
 
18
  Args:
19
+ data (Dict[str, Any]): Input data containing an 'inputs' field with 'image' and 'question',
20
  and optionally a 'date' field.
21
 
22
  Returns:
23
+ str: The answer to the question or a holiday message if applicable.
24
  """
25
+ inputs = data.get('inputs', {})
26
  date = data.get("date")
27
 
28
+ # Check if date is provided and if it's a holiday
29
  if date and date in self.holidays:
30
  return "Today is a holiday!"
31
 
32
+ # Process the image and question for document question answering
33
+ image_path = inputs.get("image")
34
+ question = inputs.get("question")
35
+
36
+ # Load and process an image
37
+ image = PIL.Image.open(image_path)
38
+ image_text = pytesseract.image_to_string(image) # Use OCR to extract text
39
+
40
+ # Run prediction (Note: this now uses the extracted text, not the image directly)
41
+ prediction = self.pipeline(question=question, context=image_text)
42
  return prediction["answer"] # Adjust based on actual output format of the model
43
+
44
+ # Note: This script assumes the use of pytesseract for OCR to process images. Ensure pytesseract is configured properly.
requirements.txt CHANGED
@@ -1 +1,3 @@
1
  holidays
 
 
 
1
  holidays
2
+ holidays
3
+ holidays