Kuldip2411 commited on
Commit
6f984e1
·
verified ·
1 Parent(s): 5c9f913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -10
app.py CHANGED
@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field
8
  import fitz
9
  import json
10
  from PIL import Image
 
11
  ocr = PaddleOCR(use_angle_cls=True, lang='es')
12
 
13
  st.set_page_config(layout="wide")
@@ -57,26 +58,41 @@ st.title("Vehicle Information Extractor")
57
  st.write("Upload a PDF file to extract vehicle information.")
58
 
59
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- if uploaded_file is not None:
62
- with open("temp.pdf", "wb") as f:
63
- f.write(uploaded_file.read())
64
-
65
  col1, col2 = st.columns(2)
66
 
67
  with col1:
68
- doc = fitz.open("temp.pdf")
69
- st.write("Uploaded PDF:")
70
  for page_num in range(len(doc)):
71
  page = doc.load_page(page_num)
72
  pix = page.get_pixmap()
73
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
74
- st.image(img, caption=f"Page {page_num+1}", use_column_width=True)
75
 
76
- content = ocr.ocr("temp.pdf")
 
77
 
78
  extracted_text = []
79
- for page in content:
80
  for result in page:
81
  text = result[1][0]
82
  extracted_text.append(text)
@@ -104,4 +120,4 @@ if uploaded_file is not None:
104
 
105
  with col2:
106
  st.write("Extracted Vehicle Information (Table):")
107
- st.table(output)
 
8
  import fitz
9
  import json
10
  from PIL import Image
11
+
12
  ocr = PaddleOCR(use_angle_cls=True, lang='es')
13
 
14
  st.set_page_config(layout="wide")
 
58
  st.write("Upload a PDF file to extract vehicle information.")
59
 
60
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
61
+ use_default = st.checkbox("Use Default Pdf")
62
+
63
+ doc = None
64
+
65
+ if use_default:
66
+ default_pdf_path = "pedido V.O.pdf"
67
+ if os.path.exists(default_pdf_path):
68
+ print("Present")
69
+ doc = fitz.open(default_pdf_path)
70
+ st.write("Using default PDF:")
71
+ else:
72
+ st.error("Default PDF not found.")
73
+
74
+ else:
75
+ if uploaded_file is not None:
76
+ with open("temp.pdf", "wb") as f:
77
+ f.write(uploaded_file.read())
78
+ doc = fitz.open("temp.pdf")
79
+ st.write("Uploaded PDF:")
80
 
81
+ if doc:
 
 
 
82
  col1, col2 = st.columns(2)
83
 
84
  with col1:
 
 
85
  for page_num in range(len(doc)):
86
  page = doc.load_page(page_num)
87
  pix = page.get_pixmap()
88
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
89
+ st.image(img, caption=f"Page {page_num + 1}", use_column_width=True)
90
 
91
+ # Perform OCR
92
+ ocr_result = ocr.ocr(default_pdf_path if use_default else "temp.pdf")
93
 
94
  extracted_text = []
95
+ for page in ocr_result:
96
  for result in page:
97
  text = result[1][0]
98
  extracted_text.append(text)
 
120
 
121
  with col2:
122
  st.write("Extracted Vehicle Information (Table):")
123
+ st.table(output)