viklofg commited on
Commit
c34a169
·
1 Parent(s): c1fbcc2

Add support for IIIF manifests

Browse files
Files changed (1) hide show
  1. app/tabs/submit.py +172 -43
app/tabs/submit.py CHANGED
@@ -1,9 +1,11 @@
1
  import logging
2
  import os
 
3
  import time
4
 
5
  import gradio as gr
6
  import spaces
 
7
  import yaml
8
  from gradio_modal import Modal
9
  from htrflow.pipeline.pipeline import Pipeline
@@ -149,33 +151,129 @@ def get_selected_example_image(event: gr.SelectData) -> str:
149
  return [event.value["image"]["path"]]
150
 
151
 
152
- def get_selected_example_pipeline(event: gr.SelectData) -> str | None:
153
  """
154
- Get the name of the pipeline that corresponds to the selected image.
 
155
  """
156
  for name, details in PIPELINES.items():
157
  if event.value["image"]["orig_name"] in details.get("examples", []):
158
  return name
 
159
 
160
 
161
- def get_image_from_image_url(input_value):
162
  """
163
- Get URL of the image from either an image_id (from Riksarkivet) or an image_url directly.
164
- If input_value is an image_id, it constructs the IIIF URL.
165
- If input_value is an image_url, it returns the URL as-is.
166
  """
 
167
 
168
- if input_value.startswith("http"):
169
- return [input_value]
170
- else:
171
- input_value = input_value.split(",")
172
 
173
- return [
174
- (
175
- f"https://lbiiif.riksarkivet.se/arkis!{item.strip()}/full/max/0/default.jpg"
176
- )
177
- for item in input_value
178
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  with gr.Blocks() as submit:
@@ -214,13 +312,9 @@ with gr.Blocks() as submit:
214
  # with Modal(visible=False) as edit_pipeline_modal:
215
 
216
  image_iiif_url = gr.Textbox(
217
- label="Upload by image ID",
218
- info=(
219
- "Use any image from our digitized archives by pasting its image ID found in the "
220
- "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
221
- "Press enter to submit."
222
- ),
223
- placeholder="R0002231_00005, R0002231_00006",
224
  )
225
 
226
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
@@ -284,27 +378,62 @@ with gr.Blocks() as submit:
284
  with gr.Row():
285
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
286
 
287
- @batch_image_gallery.upload(
288
- inputs=batch_image_gallery,
289
- outputs=[batch_image_gallery],
290
- )
291
- def validate_images(images):
292
- if len(images) > MAX_IMAGES:
293
- gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
294
- return gr.update(value=None)
295
- return images
296
-
297
- image_iiif_url.submit(
298
- fn=get_image_from_image_url, inputs=image_iiif_url, outputs=batch_image_gallery
299
- ).then(fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown)
300
-
301
- run_button.click(
302
- fn=run_htrflow,
303
- inputs=[custom_template_yaml, batch_image_gallery],
304
- outputs=[collection_submit_state, batch_image_gallery],
305
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- examples.select(get_selected_example_image, None, batch_image_gallery)
308
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
309
 
310
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)
 
1
  import logging
2
  import os
3
+ import re
4
  import time
5
 
6
  import gradio as gr
7
  import spaces
8
+ import requests
9
  import yaml
10
  from gradio_modal import Modal
11
  from htrflow.pipeline.pipeline import Pipeline
 
151
  return [event.value["image"]["path"]]
152
 
153
 
154
+ def get_selected_example_pipeline(event: gr.SelectData) -> str:
155
  """
156
+ Get the name of the pipeline that corresponds to the selected image,
157
+ return the first pipeline if no image is selected.
158
  """
159
  for name, details in PIPELINES.items():
160
  if event.value["image"]["orig_name"] in details.get("examples", []):
161
  return name
162
+ return list(PIPELINES)[0]
163
 
164
 
165
+ def is_image_id(input_value: str) -> bool:
166
  """
167
+ Check if `input_value` is a valid Riksarkivet image ID
 
 
168
  """
169
+ return bool(re.match(r"\w{8}_\w{5}", input_value))
170
 
 
 
 
 
171
 
172
+ def get_images_from_iiif_manifest(manifest: dict, height: int=1600):
173
+ """
174
+ Read all images from a v2/v3 IIIF manifest.
175
+
176
+ Arguments:
177
+ manifest: IIIF manifest
178
+ height: Max height of returned images.
179
+ """
180
+ # Hacky solution to get all images regardless of API version - treat
181
+ # the manifest as a string and match everything that looks like an IIIF
182
+ # image URL.
183
+ manifest = str(manifest)
184
+ pattern = r"(?P<identifier>https?://\S*)/(?P<region>\S*?)/(?P<size>\S*?)/(?P<rotation>!?\d*?)/(?P<quality>\S*?)\.(?P<format>jpg|tif|png|gif|jp2|pdf|webp)"
185
+
186
+ images = set() # create a set to eliminate duplicates (e.g. thumbnails and fullsize images)
187
+ for match in re.findall(pattern, manifest):
188
+ identifier, _, _, _, _, format_ = match
189
+ images.add(f"{identifier}/full/{height},/0/default.{format_}")
190
+ return sorted(images)
191
+
192
+
193
+ def handle_url_input(input_value: str) -> list[str]:
194
+ """
195
+ Get images from a string input.
196
+
197
+ Arguments:
198
+ input_value: A string, which is any of the following:
199
+ - A Riksarkivet image ID (e.g. A0068688_00123)
200
+ - A IIIF manifest URI
201
+ - An image URL
202
+
203
+ Returns:
204
+ A list of image URLs.
205
+ """
206
+ # Does it look like an image ID? => Fetch the image from Riksarkivet's IIIF.
207
+ if is_image_id(input_value):
208
+ return [f"https://lbiiif.riksarkivet.se/arkis!{input_value}/full/max/0/default.jpg"]
209
+
210
+ # Does the URL return JSON? => Treat it like a IIIF manifest.
211
+ try:
212
+ manifest = requests.get(input_value, timeout=10).json()
213
+ return get_images_from_iiif_manifest(manifest)
214
+ except (requests.HTTPError, requests.JSONDecodeError):
215
+ pass
216
+
217
+ # Else treat it as an image URL.
218
+ return [input_value]
219
+
220
+
221
+ def select_uploaded_image(selected_images, event: gr.SelectData):
222
+ """
223
+ Select an uploaded image.
224
+
225
+ Move the selected (clicked) image from the uploaded image gallery
226
+ to the selected image gallery.
227
+ """
228
+ selected_images = selected_images or []
229
+ paths = [path for path, _ in selected_images]
230
+ if event.value["image"]["path"] not in paths:
231
+ selected_images.append(event.value["image"]["path"])
232
+ return selected_images[-MAX_IMAGES:]
233
+
234
+
235
+ def deselect_selected_image(selected_images, event: gr.SelectData):
236
+ """
237
+ Deselect (remove) a previously selected image.
238
+ """
239
+ selected_images = selected_images or []
240
+ for image in selected_images:
241
+ path, _ = image
242
+ if path == event.value["image"]["path"]:
243
+ selected_images.remove(image)
244
+
245
+ # When an image is removed from the gallery, the selected index is not updated, which
246
+ # leaves a blue box around the new image at the deleted image's index. Example:
247
+ # you have three images, img0, img1 and img2 and you click img1 to delete it:
248
+ #
249
+ # img0 [img1] img2
250
+ #
251
+ # When img1 is removed, the new gallery looks like this:
252
+ #
253
+ # img0 [img2]
254
+ #
255
+ # Which means that img2 is "selected", and if you try to delete (i.e. click/select) it,
256
+ # nothing will happen. To avoid it, we need to update/remove the selected index too. For
257
+ # some reason it doesn't work to set the selected index to None, but setting it to a
258
+ # sufficiently large index achieves the wanted effect.
259
+ return gr.update(value=selected_images, selected_index=1000)
260
+
261
+
262
+ def open_image_selector_modal(uploaded_images):
263
+ """
264
+ Open image selector modal if needed (i.e. len(uploaded_images) > MAX_IMAGES)
265
+ """
266
+ visible = uploaded_images is not None and len(uploaded_images) > MAX_IMAGES
267
+ return Modal(visible=visible)
268
+
269
+
270
+ def move_uploaded_to_selected_if_possible(uploaded_images):
271
+ """
272
+ Select all uploaded images if len(uploaded_images) <= MAX_IMAGES
273
+ """
274
+ if uploaded_images is None or len(uploaded_images) <= MAX_IMAGES:
275
+ return uploaded_images
276
+ return []
277
 
278
 
279
  with gr.Blocks() as submit:
 
312
  # with Modal(visible=False) as edit_pipeline_modal:
313
 
314
  image_iiif_url = gr.Textbox(
315
+ label="From the web",
316
+ info="Paste an image URL, IIIF manifest or Riksarkivet image ID and press enter to submit.",
317
+ placeholder="www.example.com/image.jpg",
 
 
 
 
318
  )
319
 
320
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
 
378
  with gr.Row():
379
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
380
 
381
+ with Modal(visible=False, allow_user_close=False) as image_selector_modal:
382
+ gr.Markdown("# Select images")
383
+ gr.Markdown(f"The number of images exceeds the app's limit of {MAX_IMAGES} images. Please select up to {MAX_IMAGES} images to continue.")
384
+
385
+ uploaded_images_gallery = gr.Gallery(
386
+ file_types=["image"],
387
+ object_fit="scale-down",
388
+ allow_preview=False,
389
+ interactive=False,
390
+ columns=6,
391
+ label="Uploaded images",
392
+ height="100%"
393
+ )
394
+ selected_images_gallery = gr.Gallery(
395
+ file_types=["image"],
396
+ object_fit="scale-down",
397
+ allow_preview=False,
398
+ interactive=False,
399
+ columns=MAX_IMAGES,
400
+ height="100%",
401
+ label="Selected images",
402
+ visible=False
403
+ )
404
+
405
+ with gr.Row():
406
+ cancel_button = gr.Button("Cancel", variant="secondary")
407
+ ok_button = gr.Button("Continue", variant="primary")
408
+
409
+ # All images, regardless of source, are put in `uploaded_images_gallery`
410
+ batch_image_gallery.upload(lambda images: images, batch_image_gallery, uploaded_images_gallery)
411
+ image_iiif_url.submit(handle_url_input, image_iiif_url, uploaded_images_gallery)
412
+ examples.select(get_selected_example_image, None, batch_image_gallery).then(get_selected_example_image, None, uploaded_images_gallery)
413
+
414
+ # When `uploaded_images_gallery` changes, check if we need to open the modal to let the
415
+ # user select a subset of the uploaded images.
416
+ uploaded_images_gallery.change(open_image_selector_modal, uploaded_images_gallery, image_selector_modal)
417
+ uploaded_images_gallery.change(move_uploaded_to_selected_if_possible, uploaded_images_gallery, selected_images_gallery)
418
+ uploaded_images_gallery.change(move_uploaded_to_selected_if_possible, uploaded_images_gallery, batch_image_gallery)
419
+
420
+ # Image selector modal logic
421
+ uploaded_images_gallery.select(select_uploaded_image, selected_images_gallery, selected_images_gallery)
422
+ selected_images_gallery.select(deselect_selected_image, selected_images_gallery, selected_images_gallery)
423
+ selected_images_gallery.change(lambda images: gr.update(visible=bool(images)), selected_images_gallery, selected_images_gallery)
424
+ selected_images_gallery.change(lambda images: gr.update(interactive=bool(images)), selected_images_gallery, ok_button)
425
+
426
+ # Image selector modal buttons
427
+ cancel_button.click(lambda: Modal(visible=False), None, image_selector_modal)
428
+ cancel_button.click(lambda: gr.update(value=None), None, selected_images_gallery)
429
+ cancel_button.click(lambda: gr.update(value=None), None, batch_image_gallery)
430
+ cancel_button.click(lambda: gr.update(value=None), None, uploaded_images_gallery)
431
+ ok_button.click(lambda: Modal(visible=False), None, image_selector_modal)
432
+ ok_button.click(lambda images: images, selected_images_gallery, batch_image_gallery)
433
+
434
+ # Run HTRflow on selected images
435
+ run_button.click(fn=run_htrflow, inputs=[custom_template_yaml, selected_images_gallery], outputs=[collection_submit_state, batch_image_gallery])
436
 
 
437
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
438
 
439
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)