Spaces:
Runtime error
Runtime error
Fix package issues
Browse files- app.py +28 -24
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
|
4 |
import google.generativeai as genai
|
5 |
import gradio as gr
|
@@ -7,27 +8,25 @@ import pandas as pd
|
|
7 |
from gradio_pdf import PDF
|
8 |
from pdf2image import convert_from_path
|
9 |
from pypdf import PdfReader
|
10 |
-
from pathlib import Path
|
11 |
-
dir_ = Path(__file__).parent
|
12 |
|
13 |
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
|
14 |
-
headers=[
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
|
32 |
|
33 |
inputs = [PDF(label="Document")]
|
@@ -78,7 +77,6 @@ def get_content_between_curly_braces(text):
|
|
78 |
return None
|
79 |
|
80 |
|
81 |
-
|
82 |
def parse_utility_bill(filepath):
|
83 |
print("FOUND PDF!")
|
84 |
reader = PdfReader(filepath)
|
@@ -116,7 +114,7 @@ def parse_utility_bill(filepath):
|
|
116 |
print(respone_dict)
|
117 |
rectified_dict = {}
|
118 |
for target_key in headers:
|
119 |
-
|
120 |
for key, value in respone_dict.items():
|
121 |
if key == target_key:
|
122 |
rectified_dict[key] = value
|
@@ -125,15 +123,21 @@ def parse_utility_bill(filepath):
|
|
125 |
rectified_dict[target_key] = None
|
126 |
print(rectified_dict)
|
127 |
example_data = [rectified_dict]
|
128 |
-
|
129 |
|
130 |
return pd.DataFrame(example_data)
|
131 |
|
|
|
132 |
gr.Interface(
|
133 |
fn=parse_utility_bill,
|
134 |
inputs=inputs,
|
135 |
outputs=outputs,
|
136 |
-
examples=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
title="🌏⚡💧🔥PDF Utitlity Bill Parser",
|
138 |
).launch()
|
139 |
-
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
from pathlib import Path
|
4 |
|
5 |
import google.generativeai as genai
|
6 |
import gradio as gr
|
|
|
8 |
from gradio_pdf import PDF
|
9 |
from pdf2image import convert_from_path
|
10 |
from pypdf import PdfReader
|
|
|
|
|
11 |
|
12 |
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
|
13 |
+
headers = [
|
14 |
+
"DUE DATE",
|
15 |
+
"SERVICE ADDRESS",
|
16 |
+
"SERVICE PERIOD",
|
17 |
+
"ELECTRICITY USAGE (KWH)",
|
18 |
+
"ELECTRICITY SPEND ($)",
|
19 |
+
"GAS USAGE (THERMS)",
|
20 |
+
"GAS SPEND ($)",
|
21 |
+
"WATER USAGE (CCF)",
|
22 |
+
"WATER SPEND ($)",
|
23 |
+
"SEWER ($)",
|
24 |
+
"REFUSE ($)",
|
25 |
+
"STORM DRAIN ($)",
|
26 |
+
"UTILITY USERS TAX ($)",
|
27 |
+
"TOTAL CURRENT CHARGES ($)",
|
28 |
+
"TOTAL AMOUNT DUE",
|
29 |
+
]
|
30 |
|
31 |
|
32 |
inputs = [PDF(label="Document")]
|
|
|
77 |
return None
|
78 |
|
79 |
|
|
|
80 |
def parse_utility_bill(filepath):
|
81 |
print("FOUND PDF!")
|
82 |
reader = PdfReader(filepath)
|
|
|
114 |
print(respone_dict)
|
115 |
rectified_dict = {}
|
116 |
for target_key in headers:
|
117 |
+
|
118 |
for key, value in respone_dict.items():
|
119 |
if key == target_key:
|
120 |
rectified_dict[key] = value
|
|
|
123 |
rectified_dict[target_key] = None
|
124 |
print(rectified_dict)
|
125 |
example_data = [rectified_dict]
|
|
|
126 |
|
127 |
return pd.DataFrame(example_data)
|
128 |
|
129 |
+
|
130 |
gr.Interface(
|
131 |
fn=parse_utility_bill,
|
132 |
inputs=inputs,
|
133 |
outputs=outputs,
|
134 |
+
examples=[
|
135 |
+
"utl-bill-sample.pdf",
|
136 |
+
"nem-2-utility-bill-sample.pdf",
|
137 |
+
"Sample_Utility_Bill.pdf",
|
138 |
+
"Water Bill Sample.pdf",
|
139 |
+
"canada.pdf",
|
140 |
+
"water.pdf",
|
141 |
+
],
|
142 |
title="🌏⚡💧🔥PDF Utitlity Bill Parser",
|
143 |
).launch()
|
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
pypdf
|
2 |
pytesseract
|
3 |
gradio_pdf
|
4 |
-
google-generativeai
|
|
|
|
1 |
pypdf
|
2 |
pytesseract
|
3 |
gradio_pdf
|
4 |
+
google-generativeai
|
5 |
+
pdf2image
|