Spaces:
Running
on
Zero
Running
on
Zero
improve latex parsing
Browse files
app.py
CHANGED
@@ -133,34 +133,43 @@ def update_inputs(task):
|
|
133 |
]
|
134 |
|
135 |
def parse_latex_output(res):
|
136 |
-
|
|
|
137 |
parsed_lines = []
|
138 |
in_latex = False
|
139 |
-
|
|
|
140 |
for line in lines:
|
141 |
-
line
|
142 |
-
|
|
|
|
|
|
|
143 |
continue
|
|
|
|
|
144 |
|
145 |
-
latex_patterns = [r'\{', r'\}', r'\[', r'\]', r'\\', r'\$', r'_', r'^']
|
146 |
contains_latex = any(re.search(pattern, line) for pattern in latex_patterns)
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
if contains_latex or is_key_value:
|
151 |
if not in_latex:
|
152 |
-
parsed_lines.append('$$')
|
153 |
in_latex = True
|
154 |
-
|
|
|
155 |
else:
|
156 |
if in_latex:
|
157 |
-
|
|
|
158 |
in_latex = False
|
|
|
159 |
parsed_lines.append(line)
|
|
|
160 |
if in_latex:
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
164 |
|
165 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
166 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|
|
|
133 |
]
|
134 |
|
135 |
def parse_latex_output(res):
|
136 |
+
# Split the input, preserving newlines and empty lines
|
137 |
+
lines = re.split(r'(\$\$.*?\$\$)', res, flags=re.DOTALL)
|
138 |
parsed_lines = []
|
139 |
in_latex = False
|
140 |
+
latex_buffer = []
|
141 |
+
|
142 |
for line in lines:
|
143 |
+
if line == '\n':
|
144 |
+
if in_latex:
|
145 |
+
latex_buffer.append(line)
|
146 |
+
else:
|
147 |
+
parsed_lines.append(line)
|
148 |
continue
|
149 |
+
|
150 |
+
line = line.strip()
|
151 |
|
152 |
+
latex_patterns = [r'\{', r'\}', r'\[', r'\]', r'\\', r'\$', r'_', r'^', r'"']
|
153 |
contains_latex = any(re.search(pattern, line) for pattern in latex_patterns)
|
154 |
|
155 |
+
if contains_latex:
|
|
|
|
|
156 |
if not in_latex:
|
|
|
157 |
in_latex = True
|
158 |
+
latex_buffer = ['$$']
|
159 |
+
latex_buffer.append(line)
|
160 |
else:
|
161 |
if in_latex:
|
162 |
+
latex_buffer.append('$$')
|
163 |
+
parsed_lines.extend(latex_buffer)
|
164 |
in_latex = False
|
165 |
+
latex_buffer = []
|
166 |
parsed_lines.append(line)
|
167 |
+
|
168 |
if in_latex:
|
169 |
+
latex_buffer.append('$$')
|
170 |
+
parsed_lines.extend(latex_buffer)
|
171 |
+
|
172 |
+
return '$$\n$$'.join(parsed_lines)
|
173 |
|
174 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
175 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|