Tonic commited on
Commit
fa528ee
Β·
unverified Β·
1 Parent(s): f63ea6a

improve interface logic with image editor and parse res

Browse files
Files changed (1) hide show
  1. app.py +58 -19
app.py CHANGED
@@ -24,11 +24,6 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
24
  model = model.eval().cuda()
25
  model.config.pad_token_id = tokenizer.eos_token_id
26
 
27
- def image_to_base64(image):
28
- buffered = io.BytesIO()
29
- image.save(buffered, format="PNG")
30
- return base64.b64encode(buffered.getvalue()).decode()
31
-
32
  UPLOAD_FOLDER = "./uploads"
33
  RESULTS_FOLDER = "./results"
34
 
@@ -36,6 +31,13 @@ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
36
  if not os.path.exists(folder):
37
  os.makedirs(folder)
38
 
 
 
 
 
 
 
 
39
  @spaces.GPU()
40
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
41
  if image is None:
@@ -45,9 +47,25 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
45
  image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
46
  result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
47
 
48
- shutil.copy(image, image_path)
49
-
50
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  if task == "Plain Text OCR":
52
  res = model.chat(tokenizer, image_path, ocr_type='ocr')
53
  return res, None, unique_id
@@ -74,21 +92,34 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
74
  finally:
75
  if os.path.exists(image_path):
76
  os.remove(image_path)
77
-
 
 
 
 
 
78
  def update_inputs(task):
79
  if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
80
- return [gr.update(visible=False)] * 3
81
  elif task == "Fine-grained OCR (Box)":
82
  return [
83
  gr.update(visible=True, choices=["ocr", "format"]),
84
  gr.update(visible=True),
85
  gr.update(visible=False),
 
 
 
 
86
  ]
87
  elif task == "Fine-grained OCR (Color)":
88
  return [
89
  gr.update(visible=True, choices=["ocr", "format"]),
90
  gr.update(visible=False),
91
  gr.update(visible=True, choices=["red", "green", "blue"]),
 
 
 
 
92
  ]
93
 
94
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
@@ -96,7 +127,9 @@ def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
96
 
97
  if res.startswith("Error:"):
98
  return res, None
99
-
 
 
100
  res = f"$$ {res} $$"
101
 
102
  if html_content:
@@ -118,18 +151,11 @@ with gr.Blocks() as demo:
118
  with gr.Row():
119
  gr.Markdown(title)
120
 
121
- with gr.Row():
122
- with gr.Column(scale=1):
123
- gr.Markdown(description)
124
- with gr.Column(scale=1):
125
- with gr.Group():
126
- gr.Markdown(modelinfor)
127
- gr.Markdown(joinus)
128
-
129
  with gr.Row():
130
  with gr.Column(scale=1):
131
  with gr.Group():
132
  image_input = gr.Image(type="filepath", label="Input Image")
 
133
  task_dropdown = gr.Dropdown(
134
  choices=[
135
  "Plain Text OCR",
@@ -158,6 +184,7 @@ with gr.Blocks() as demo:
158
  visible=False
159
  )
160
  submit_button = gr.Button("Process")
 
161
 
162
  with gr.Column(scale=1):
163
  with gr.Group():
@@ -167,7 +194,13 @@ with gr.Blocks() as demo:
167
  task_dropdown.change(
168
  update_inputs,
169
  inputs=[task_dropdown],
170
- outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown]
 
 
 
 
 
 
171
  )
172
 
173
  submit_button.click(
@@ -176,6 +209,12 @@ with gr.Blocks() as demo:
176
  outputs=[output_markdown, output_html]
177
  )
178
 
 
 
 
 
 
 
179
  if __name__ == "__main__":
180
  cleanup_old_files()
181
  demo.launch()
 
24
  model = model.eval().cuda()
25
  model.config.pad_token_id = tokenizer.eos_token_id
26
 
 
 
 
 
 
27
  UPLOAD_FOLDER = "./uploads"
28
  RESULTS_FOLDER = "./results"
29
 
 
31
  if not os.path.exists(folder):
32
  os.makedirs(folder)
33
 
34
+ def image_to_base64(image):
35
+ buffered = io.BytesIO()
36
+ image.save(buffered, format="PNG")
37
+ return base64.b64encode(buffered.getvalue()).decode()
38
+
39
+
40
+
41
  @spaces.GPU()
42
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
43
  if image is None:
 
47
  image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
48
  result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
49
 
 
 
50
  try:
51
+ if isinstance(image, dict): # If image is from ImageEditor
52
+ composite_image = image.get("composite")
53
+ if composite_image is not None:
54
+ if isinstance(composite_image, np.ndarray):
55
+ Image.fromarray(composite_image).save(image_path)
56
+ elif isinstance(composite_image, str):
57
+ shutil.copy(composite_image, image_path)
58
+ else:
59
+ return "Error: Unsupported image format from ImageEditor", None, None
60
+ else:
61
+ return "Error: No composite image found in ImageEditor output", None, None
62
+ elif isinstance(image, np.ndarray):
63
+ Image.fromarray(image).save(image_path)
64
+ elif isinstance(image, str):
65
+ shutil.copy(image, image_path)
66
+ else:
67
+ return "Error: Unsupported image format", None, None
68
+
69
  if task == "Plain Text OCR":
70
  res = model.chat(tokenizer, image_path, ocr_type='ocr')
71
  return res, None, unique_id
 
92
  finally:
93
  if os.path.exists(image_path):
94
  os.remove(image_path)
95
+ def update_image_input(task):
96
+ if task == "Fine-grained OCR (Color)":
97
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
98
+ else:
99
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
100
+
101
  def update_inputs(task):
102
  if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
103
+ return [gr.update(visible=False)] * 5 + [gr.update(visible=True), gr.update(visible=False)]
104
  elif task == "Fine-grained OCR (Box)":
105
  return [
106
  gr.update(visible=True, choices=["ocr", "format"]),
107
  gr.update(visible=True),
108
  gr.update(visible=False),
109
+ gr.update(visible=False),
110
+ gr.update(visible=False),
111
+ gr.update(visible=True),
112
+ gr.update(visible=False)
113
  ]
114
  elif task == "Fine-grained OCR (Color)":
115
  return [
116
  gr.update(visible=True, choices=["ocr", "format"]),
117
  gr.update(visible=False),
118
  gr.update(visible=True, choices=["red", "green", "blue"]),
119
+ gr.update(visible=False),
120
+ gr.update(visible=True),
121
+ gr.update(visible=False),
122
+ gr.update(visible=True)
123
  ]
124
 
125
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
 
127
 
128
  if res.startswith("Error:"):
129
  return res, None
130
+
131
+ res = res.replace("\\title", "\\title ")
132
+
133
  res = f"$$ {res} $$"
134
 
135
  if html_content:
 
151
  with gr.Row():
152
  gr.Markdown(title)
153
 
 
 
 
 
 
 
 
 
154
  with gr.Row():
155
  with gr.Column(scale=1):
156
  with gr.Group():
157
  image_input = gr.Image(type="filepath", label="Input Image")
158
+ image_editor = gr.ImageEditor(label="Image Editor", type="pil", visible=False)
159
  task_dropdown = gr.Dropdown(
160
  choices=[
161
  "Plain Text OCR",
 
184
  visible=False
185
  )
186
  submit_button = gr.Button("Process")
187
+ editor_submit_button = gr.Button("Process Edited Image", visible=False)
188
 
189
  with gr.Column(scale=1):
190
  with gr.Group():
 
194
  task_dropdown.change(
195
  update_inputs,
196
  inputs=[task_dropdown],
197
+ outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, image_input, image_editor, submit_button, editor_submit_button]
198
+ )
199
+
200
+ task_dropdown.change(
201
+ update_image_input,
202
+ inputs=[task_dropdown],
203
+ outputs=[image_input, image_editor, editor_submit_button]
204
  )
205
 
206
  submit_button.click(
 
209
  outputs=[output_markdown, output_html]
210
  )
211
 
212
+ editor_submit_button.click(
213
+ ocr_demo,
214
+ inputs=[image_editor, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
215
+ outputs=[output_markdown, output_html]
216
+ )
217
+
218
  if __name__ == "__main__":
219
  cleanup_old_files()
220
  demo.launch()