viklofg commited on
Commit
71a6d4c
·
1 Parent(s): 5862706

Add support for IIIF manifests

Browse files

- URL input can now take a IIIF manifest link
- Added new modal to let the user select which images they want
to transcribe, whenever the number of uploaded images exceed the
app's limit
- Add joint handling of "too many images" to both IIIF-sourced
images and uploaded images

Files changed (1) hide show
  1. app/tabs/submit.py +174 -27
app/tabs/submit.py CHANGED
@@ -1,9 +1,11 @@
1
  import logging
2
  import os
 
3
  import time
4
 
5
  import gradio as gr
6
  import spaces
 
7
  import yaml
8
  from gradio_modal import Modal
9
  from htrflow.pipeline.pipeline import Pipeline
@@ -109,7 +111,7 @@ def run_htrflow(custom_template_yaml, batch_image_gallery, progress=gr.Progress(
109
  time.sleep(2)
110
  gr.Info("Completed succesfully ✨")
111
 
112
- yield collection, gr.skip()
113
 
114
 
115
  def get_pipeline_description(pipeline: str) -> str:
@@ -178,6 +180,120 @@ def get_image_from_image_url(input_value):
178
  ]
179
 
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  with gr.Blocks() as submit:
182
  with gr.Row():
183
  with gr.Column():
@@ -214,13 +330,9 @@ with gr.Blocks() as submit:
214
  # with Modal(visible=False) as edit_pipeline_modal:
215
 
216
  image_iiif_url = gr.Textbox(
217
- label="Upload by image ID",
218
- info=(
219
- "Use any image from our digitized archives by pasting its image ID found in the "
220
- "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
221
- "Press enter to submit."
222
- ),
223
- placeholder="R0002231_00005, R0002231_00006",
224
  )
225
 
226
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
@@ -284,27 +396,62 @@ with gr.Blocks() as submit:
284
  with gr.Row():
285
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
286
 
287
- @batch_image_gallery.upload(
288
- inputs=batch_image_gallery,
289
- outputs=[batch_image_gallery],
290
- )
291
- def validate_images(images):
292
- if len(images) > MAX_IMAGES:
293
- gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
294
- return gr.update(value=None)
295
- return images
296
-
297
- image_iiif_url.submit(
298
- fn=get_image_from_image_url, inputs=image_iiif_url, outputs=batch_image_gallery
299
- ).then(fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown)
300
-
301
- run_button.click(
302
- fn=run_htrflow,
303
- inputs=[custom_template_yaml, batch_image_gallery],
304
- outputs=[collection_submit_state, batch_image_gallery],
305
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  examples.select(get_selected_example_image, None, batch_image_gallery)
 
308
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
309
 
310
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)
 
1
  import logging
2
  import os
3
+ import re
4
  import time
5
 
6
  import gradio as gr
7
  import spaces
8
+ import requests
9
  import yaml
10
  from gradio_modal import Modal
11
  from htrflow.pipeline.pipeline import Pipeline
 
111
  time.sleep(2)
112
  gr.Info("Completed succesfully ✨")
113
 
114
+ yield collection
115
 
116
 
117
  def get_pipeline_description(pipeline: str) -> str:
 
180
  ]
181
 
182
 
183
+ def is_image_id(input_value: str) -> bool:
184
+ """
185
+ Check if `input_value` is a valid Riksarkivet image ID
186
+ """
187
+ return bool(re.match(r"\w{8}_\w{5}", input_value))
188
+
189
+
190
+ def get_images_from_iiif_manifest(manifest: dict, height: int=1600):
191
+ """
192
+ Read all images from a v2/v3 IIIF manifest.
193
+
194
+ Arguments:
195
+ manifest: IIIF manifest
196
+ height: Max height of returned images.
197
+ """
198
+ # Hacky solution to get all images regardless of API version - treat
199
+ # the manifest as a string and match everything that looks like an IIIF
200
+ # image URL.
201
+ manifest = str(manifest)
202
+ pattern = r"(?P<identifier>https?://\S*)/(?P<region>\S*?)/(?P<size>\S*?)/(?P<rotation>!?\d*?)/(?P<quality>\S*?)\.(?P<format>jpg|tif|png|gif|jp2|pdf|webp)"
203
+
204
+ images = set() # create a set to eliminate duplicates (e.g. thumbnails and fullsize images)
205
+ for match in re.findall(pattern, manifest):
206
+ identifier, _, _, _, _, format_ = match
207
+ images.add(f"{identifier}/full/{height},/0/default.{format_}")
208
+ return sorted(images)
209
+
210
+
211
+ def handle_url_input(input_value: str) -> list[str]:
212
+ """
213
+ Get images from a string input.
214
+
215
+ Arguments:
216
+ input_value: A string, which is any of the following:
217
+ - A Riksarkivet image ID (e.g. A0068688_00123)
218
+ - A IIIF manifest URI
219
+ - An image URL
220
+
221
+ Returns:
222
+ A list of image URLs.
223
+ """
224
+ # Does it look like an image ID? => Fetch the image from Riksarkivet's IIIF.
225
+ if is_image_id(input_value):
226
+ return [f"https://lbiiif.riksarkivet.se/arkis!{input_value}/full/max/0/default.jpg"]
227
+
228
+ # Does the URL return JSON? => Treat it like a IIIF manifest.
229
+ try:
230
+ manifest = requests.get(input_value).json()
231
+ return get_images_from_iiif_manifest(manifest)
232
+ except (requests.HTTPError, requests.JSONDecodeError):
233
+ pass
234
+
235
+ # Else treat it as an image URL.
236
+ return [input_value]
237
+
238
+
239
+ def select_uploaded_image(selected_images, event: gr.SelectData):
240
+ """
241
+ Select an uploaded image.
242
+
243
+ Move the selected (clicked) image from the uploaded image gallery
244
+ to the selected image gallery.
245
+ """
246
+ selected_images = selected_images or []
247
+ paths = [path for path, _ in selected_images]
248
+ if event.value["image"]["path"] not in paths:
249
+ selected_images.append(event.value["image"]["path"])
250
+ return selected_images[-MAX_IMAGES:]
251
+
252
+
253
+ def deselect_selected_image(selected_images, event: gr.SelectData):
254
+ """
255
+ Deselect (remove) a previously selected image.
256
+ """
257
+ selected_images = selected_images or []
258
+ for image in selected_images:
259
+ path, _ = image
260
+ if path == event.value["image"]["path"]:
261
+ selected_images.remove(image)
262
+
263
+ # When an image is removed from the gallery, the selected index is not updated, which
264
+ # leaves a blue box around the new image at the deleted image's index. Example:
265
+ # you have three images, img0, img1 and img2 and you click img1 to delete it:
266
+ #
267
+ # img0 [img1] img2
268
+ #
269
+ # When img1 is removed, the new gallery looks like this:
270
+ #
271
+ # img0 [img2]
272
+ #
273
+ # Which means that img2 is "selected", and if you try to delete (i.e. click/select) it,
274
+ # nothing will happen. To avoid it, we need to update/remove the selected index too. For
275
+ # some reason it doesn't work to set the selected index to None, but setting it to a
276
+ # sufficiently large index achieves the wanted effect.
277
+ return gr.update(value=selected_images, selected_index=1000)
278
+
279
+
280
+ def open_image_selector_modal(uploaded_images):
281
+ """
282
+ Open image selector modal if needed (i.e. len(uploaded_images) > MAX_IMAGES)
283
+ """
284
+ visible = uploaded_images is not None and len(uploaded_images) > MAX_IMAGES
285
+ return Modal(visible=visible)
286
+
287
+
288
+ def move_uploaded_to_selected_if_possible(uploaded_images):
289
+ """
290
+ Select all uploaded images if len(uploaded_images) <= MAX_IMAGES
291
+ """
292
+ if uploaded_images is not None and len(uploaded_images) <= MAX_IMAGES:
293
+ return uploaded_images
294
+ return []
295
+
296
+
297
  with gr.Blocks() as submit:
298
  with gr.Row():
299
  with gr.Column():
 
330
  # with Modal(visible=False) as edit_pipeline_modal:
331
 
332
  image_iiif_url = gr.Textbox(
333
+ label="From the web",
334
+ info="Paste an image URL, IIIF manifest or Riksarkivet image ID and press enter to submit.",
335
+ placeholder="www.example.com/image.jpg",
 
 
 
 
336
  )
337
 
338
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
 
396
  with gr.Row():
397
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
398
 
399
+ with Modal(visible=False, allow_user_close=False) as image_selector_modal:
400
+ gr.Markdown("# Select images")
401
+ gr.Markdown(f"The number of images exceeds the app's limit of {MAX_IMAGES} images. Please select up to {MAX_IMAGES} images to continue.")
402
+
403
+ uploaded_images_gallery = gr.Gallery(
404
+ file_types=["image"],
405
+ object_fit="scale-down",
406
+ allow_preview=False,
407
+ interactive=False,
408
+ columns=6,
409
+ label="Uploaded images",
410
+ height="100%"
411
+ )
412
+ selected_images_gallery = gr.Gallery(
413
+ file_types=["image"],
414
+ object_fit="scale-down",
415
+ allow_preview=False,
416
+ interactive=False,
417
+ columns=MAX_IMAGES,
418
+ height="100%",
419
+ label="Selected images",
420
+ visible=False
421
+ )
422
+
423
+ with gr.Row():
424
+ cancel_button = gr.Button("Cancel", variant="secondary")
425
+ ok_button = gr.Button("Continue", variant="primary")
426
+
427
+ # All incoming images (uploaded or URL) are put in `uploaded_images_gallery`
428
+ batch_image_gallery.upload(lambda images: images, batch_image_gallery, uploaded_images_gallery)
429
+ image_iiif_url.submit(handle_url_input, image_iiif_url, uploaded_images_gallery).then(fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown)
430
+ image_iiif_url.submit(open_image_selector_modal, uploaded_images_gallery, image_selector_modal)
431
+
432
+ # Move uploaded images to `selected_images`, opening the modal if needed
433
+ uploaded_images_gallery.change(open_image_selector_modal, uploaded_images_gallery, image_selector_modal)
434
+ uploaded_images_gallery.change(move_uploaded_to_selected_if_possible, uploaded_images_gallery, selected_images_gallery)
435
+
436
+ # Image selector modal logic
437
+ uploaded_images_gallery.select(select_uploaded_image, selected_images_gallery, selected_images_gallery)
438
+ selected_images_gallery.select(deselect_selected_image, selected_images_gallery, selected_images_gallery)
439
+ selected_images_gallery.change(lambda images: gr.update(visible=bool(images)), selected_images_gallery, selected_images_gallery)
440
+ selected_images_gallery.change(lambda images: gr.update(interactive=bool(images)), selected_images_gallery, ok_button)
441
+
442
+ # Image selector modal buttons
443
+ cancel_button.click(lambda: Modal(visible=False), None, image_selector_modal)
444
+ cancel_button.click(lambda: [], None, selected_images_gallery)
445
+ cancel_button.click(lambda: gr.update(value=None), None, batch_image_gallery)
446
+ ok_button.click(lambda: Modal(visible=False), None, image_selector_modal)
447
+ ok_button.click(lambda x: x, selected_images_gallery, batch_image_gallery)
448
+
449
+ # Run HTRflow on selected images
450
+ run_button.click(fn=run_htrflow, inputs=[custom_template_yaml, selected_images_gallery], outputs=collection_submit_state)
451
+ run_button.click(lambda: [], None, )
452
 
453
  examples.select(get_selected_example_image, None, batch_image_gallery)
454
+ examples.select(get_selected_example_image, None, uploaded_images_gallery)
455
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
456
 
457
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)