Tonic commited on
Commit
a197908
Β·
verified Β·
1 Parent(s): 73597ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -42,7 +42,7 @@ def image_to_base64(image):
42
 
43
 
44
  @spaces.GPU()
45
- def process_image(image, task, max_new_tokens, no_repeat_ngram_size, ocr_type=None, ocr_box=None, ocr_color=None):
46
  if image is None:
47
  return "Error: No image provided", None, None
48
 
@@ -70,19 +70,19 @@ def process_image(image, task, max_new_tokens, no_repeat_ngram_size, ocr_type=No
70
  return "Error: Unsupported image format", None, None
71
 
72
  if task == "Plain Text OCR":
73
- res = model.chat(tokenizer, image_path, ocr_type='ocr', max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
74
  return res, None, unique_id
75
  else:
76
  if task == "Format Text OCR":
77
- res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
78
  elif task == "Fine-grained OCR (Box)":
79
- res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
80
  elif task == "Fine-grained OCR (Color)":
81
- res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
82
  elif task == "Multi-crop OCR":
83
- res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
84
  elif task == "Render Formatted OCR":
85
- res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
86
 
87
  if os.path.exists(result_path):
88
  with open(result_path, 'r') as f:
@@ -173,8 +173,9 @@ def parse_latex_output(res):
173
 
174
  return '$$\\$$\n'.join(parsed_lines)
175
 
176
- def ocr_demo(image, task, ocr_type, ocr_box, ocr_color, max_new_tokens, no_repeat_ngram_size):
177
- res, html_content, unique_id = process_image(image, task, max_new_tokens, no_repeat_ngram_size, ocr_type, ocr_box, ocr_color)
 
178
 
179
  if isinstance(res, str) and res.startswith("Error:"):
180
  return res, None
@@ -197,7 +198,6 @@ def cleanup_old_files():
197
  if current_time - file_path.stat().st_mtime > 3600: # 1 hour
198
  file_path.unlink()
199
 
200
-
201
  with gr.Blocks(theme=gr.themes.Base()) as demo:
202
  with gr.Row():
203
  gr.Markdown(title)
@@ -253,7 +253,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
253
  visible=False
254
  )
255
  with gr.Row():
256
- max_new_tokens_slider = gr.Slider(50, 500, step=10, value=150, label="Max New Tokens")
257
  no_repeat_ngram_size_slider = gr.Slider(1, 10, step=1, value=2, label="No Repeat N-gram Size")
258
 
259
  submit_button = gr.Button("Process")
@@ -278,7 +278,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
278
 
279
  submit_button.click(
280
  ocr_demo,
281
- inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, max_new_tokens_slider, no_repeat_ngram_size_slider],
282
  outputs=[output_markdown, output_html]
283
  )
284
  editor_submit_button.click(
 
42
 
43
 
44
  @spaces.GPU()
45
+ def process_image(image, task, no_repeat_ngram_size, ocr_type=None, ocr_box=None, ocr_color=None):
46
  if image is None:
47
  return "Error: No image provided", None, None
48
 
 
70
  return "Error: Unsupported image format", None, None
71
 
72
  if task == "Plain Text OCR":
73
+ res = model.chat(tokenizer, image_path, ocr_type='ocr', no_repeat_ngram_size=no_repeat_ngram_size)
74
  return res, None, unique_id
75
  else:
76
  if task == "Format Text OCR":
77
+ res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
78
  elif task == "Fine-grained OCR (Box)":
79
+ res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
80
  elif task == "Fine-grained OCR (Color)":
81
+ res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
82
  elif task == "Multi-crop OCR":
83
+ res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
84
  elif task == "Render Formatted OCR":
85
+ res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
86
 
87
  if os.path.exists(result_path):
88
  with open(result_path, 'r') as f:
 
173
 
174
  return '$$\\$$\n'.join(parsed_lines)
175
 
176
+
177
+ def ocr_demo(image, task, ocr_type, ocr_box, ocr_color, no_repeat_ngram_size):
178
+ res, html_content, unique_id = process_image(image, task, no_repeat_ngram_size, ocr_type, ocr_box, ocr_color)
179
 
180
  if isinstance(res, str) and res.startswith("Error:"):
181
  return res, None
 
198
  if current_time - file_path.stat().st_mtime > 3600: # 1 hour
199
  file_path.unlink()
200
 
 
201
  with gr.Blocks(theme=gr.themes.Base()) as demo:
202
  with gr.Row():
203
  gr.Markdown(title)
 
253
  visible=False
254
  )
255
  with gr.Row():
256
+ # max_new_tokens_slider = gr.Slider(50, 500, step=10, value=150, label="Max New Tokens")
257
  no_repeat_ngram_size_slider = gr.Slider(1, 10, step=1, value=2, label="No Repeat N-gram Size")
258
 
259
  submit_button = gr.Button("Process")
 
278
 
279
  submit_button.click(
280
  ocr_demo,
281
+ inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, no_repeat_ngram_size_slider],
282
  outputs=[output_markdown, output_html]
283
  )
284
  editor_submit_button.click(