viklofg commited on
Commit
cb30282
·
1 Parent(s): c34a169

Revert "Add support for IIIF manifests"

Browse files

This reverts commit 71a6d4ceebdf711c422fa57d420c9c8861944e5d.

Files changed (1) hide show
  1. app/tabs/submit.py +43 -172
app/tabs/submit.py CHANGED
@@ -1,11 +1,9 @@
1
  import logging
2
  import os
3
- import re
4
  import time
5
 
6
  import gradio as gr
7
  import spaces
8
- import requests
9
  import yaml
10
  from gradio_modal import Modal
11
  from htrflow.pipeline.pipeline import Pipeline
@@ -151,129 +149,33 @@ def get_selected_example_image(event: gr.SelectData) -> str:
151
  return [event.value["image"]["path"]]
152
 
153
 
154
- def get_selected_example_pipeline(event: gr.SelectData) -> str:
155
  """
156
- Get the name of the pipeline that corresponds to the selected image,
157
- return the first pipeline if no image is selected.
158
  """
159
  for name, details in PIPELINES.items():
160
  if event.value["image"]["orig_name"] in details.get("examples", []):
161
  return name
162
- return list(PIPELINES)[0]
163
 
164
 
165
- def is_image_id(input_value: str) -> bool:
166
  """
167
- Check if `input_value` is a valid Riksarkivet image ID
 
 
168
  """
169
- return bool(re.match(r"\w{8}_\w{5}", input_value))
170
 
 
 
 
 
171
 
172
- def get_images_from_iiif_manifest(manifest: dict, height: int=1600):
173
- """
174
- Read all images from a v2/v3 IIIF manifest.
175
-
176
- Arguments:
177
- manifest: IIIF manifest
178
- height: Max height of returned images.
179
- """
180
- # Hacky solution to get all images regardless of API version - treat
181
- # the manifest as a string and match everything that looks like an IIIF
182
- # image URL.
183
- manifest = str(manifest)
184
- pattern = r"(?P<identifier>https?://\S*)/(?P<region>\S*?)/(?P<size>\S*?)/(?P<rotation>!?\d*?)/(?P<quality>\S*?)\.(?P<format>jpg|tif|png|gif|jp2|pdf|webp)"
185
-
186
- images = set() # create a set to eliminate duplicates (e.g. thumbnails and fullsize images)
187
- for match in re.findall(pattern, manifest):
188
- identifier, _, _, _, _, format_ = match
189
- images.add(f"{identifier}/full/{height},/0/default.{format_}")
190
- return sorted(images)
191
-
192
-
193
- def handle_url_input(input_value: str) -> list[str]:
194
- """
195
- Get images from a string input.
196
-
197
- Arguments:
198
- input_value: A string, which is any of the following:
199
- - A Riksarkivet image ID (e.g. A0068688_00123)
200
- - A IIIF manifest URI
201
- - An image URL
202
-
203
- Returns:
204
- A list of image URLs.
205
- """
206
- # Does it look like an image ID? => Fetch the image from Riksarkivet's IIIF.
207
- if is_image_id(input_value):
208
- return [f"https://lbiiif.riksarkivet.se/arkis!{input_value}/full/max/0/default.jpg"]
209
-
210
- # Does the URL return JSON? => Treat it like a IIIF manifest.
211
- try:
212
- manifest = requests.get(input_value, timeout=10).json()
213
- return get_images_from_iiif_manifest(manifest)
214
- except (requests.HTTPError, requests.JSONDecodeError):
215
- pass
216
-
217
- # Else treat it as an image URL.
218
- return [input_value]
219
-
220
-
221
- def select_uploaded_image(selected_images, event: gr.SelectData):
222
- """
223
- Select an uploaded image.
224
-
225
- Move the selected (clicked) image from the uploaded image gallery
226
- to the selected image gallery.
227
- """
228
- selected_images = selected_images or []
229
- paths = [path for path, _ in selected_images]
230
- if event.value["image"]["path"] not in paths:
231
- selected_images.append(event.value["image"]["path"])
232
- return selected_images[-MAX_IMAGES:]
233
-
234
-
235
- def deselect_selected_image(selected_images, event: gr.SelectData):
236
- """
237
- Deselect (remove) a previously selected image.
238
- """
239
- selected_images = selected_images or []
240
- for image in selected_images:
241
- path, _ = image
242
- if path == event.value["image"]["path"]:
243
- selected_images.remove(image)
244
-
245
- # When an image is removed from the gallery, the selected index is not updated, which
246
- # leaves a blue box around the new image at the deleted image's index. Example:
247
- # you have three images, img0, img1 and img2 and you click img1 to delete it:
248
- #
249
- # img0 [img1] img2
250
- #
251
- # When img1 is removed, the new gallery looks like this:
252
- #
253
- # img0 [img2]
254
- #
255
- # Which means that img2 is "selected", and if you try to delete (i.e. click/select) it,
256
- # nothing will happen. To avoid it, we need to update/remove the selected index too. For
257
- # some reason it doesn't work to set the selected index to None, but setting it to a
258
- # sufficiently large index achieves the wanted effect.
259
- return gr.update(value=selected_images, selected_index=1000)
260
-
261
-
262
- def open_image_selector_modal(uploaded_images):
263
- """
264
- Open image selector modal if needed (i.e. len(uploaded_images) > MAX_IMAGES)
265
- """
266
- visible = uploaded_images is not None and len(uploaded_images) > MAX_IMAGES
267
- return Modal(visible=visible)
268
-
269
-
270
- def move_uploaded_to_selected_if_possible(uploaded_images):
271
- """
272
- Select all uploaded images if len(uploaded_images) <= MAX_IMAGES
273
- """
274
- if uploaded_images is None or len(uploaded_images) <= MAX_IMAGES:
275
- return uploaded_images
276
- return []
277
 
278
 
279
  with gr.Blocks() as submit:
@@ -312,9 +214,13 @@ with gr.Blocks() as submit:
312
  # with Modal(visible=False) as edit_pipeline_modal:
313
 
314
  image_iiif_url = gr.Textbox(
315
- label="From the web",
316
- info="Paste an image URL, IIIF manifest or Riksarkivet image ID and press enter to submit.",
317
- placeholder="www.example.com/image.jpg",
 
 
 
 
318
  )
319
 
320
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
@@ -378,62 +284,27 @@ with gr.Blocks() as submit:
378
  with gr.Row():
379
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
380
 
381
- with Modal(visible=False, allow_user_close=False) as image_selector_modal:
382
- gr.Markdown("# Select images")
383
- gr.Markdown(f"The number of images exceeds the app's limit of {MAX_IMAGES} images. Please select up to {MAX_IMAGES} images to continue.")
384
-
385
- uploaded_images_gallery = gr.Gallery(
386
- file_types=["image"],
387
- object_fit="scale-down",
388
- allow_preview=False,
389
- interactive=False,
390
- columns=6,
391
- label="Uploaded images",
392
- height="100%"
393
- )
394
- selected_images_gallery = gr.Gallery(
395
- file_types=["image"],
396
- object_fit="scale-down",
397
- allow_preview=False,
398
- interactive=False,
399
- columns=MAX_IMAGES,
400
- height="100%",
401
- label="Selected images",
402
- visible=False
403
- )
404
-
405
- with gr.Row():
406
- cancel_button = gr.Button("Cancel", variant="secondary")
407
- ok_button = gr.Button("Continue", variant="primary")
408
-
409
- # All images, regardless of source, are put in `uploaded_images_gallery`
410
- batch_image_gallery.upload(lambda images: images, batch_image_gallery, uploaded_images_gallery)
411
- image_iiif_url.submit(handle_url_input, image_iiif_url, uploaded_images_gallery)
412
- examples.select(get_selected_example_image, None, batch_image_gallery).then(get_selected_example_image, None, uploaded_images_gallery)
413
-
414
- # When `uploaded_images_gallery` changes, check if we need to open the modal to let the
415
- # user select a subset of the uploaded images.
416
- uploaded_images_gallery.change(open_image_selector_modal, uploaded_images_gallery, image_selector_modal)
417
- uploaded_images_gallery.change(move_uploaded_to_selected_if_possible, uploaded_images_gallery, selected_images_gallery)
418
- uploaded_images_gallery.change(move_uploaded_to_selected_if_possible, uploaded_images_gallery, batch_image_gallery)
419
-
420
- # Image selector modal logic
421
- uploaded_images_gallery.select(select_uploaded_image, selected_images_gallery, selected_images_gallery)
422
- selected_images_gallery.select(deselect_selected_image, selected_images_gallery, selected_images_gallery)
423
- selected_images_gallery.change(lambda images: gr.update(visible=bool(images)), selected_images_gallery, selected_images_gallery)
424
- selected_images_gallery.change(lambda images: gr.update(interactive=bool(images)), selected_images_gallery, ok_button)
425
-
426
- # Image selector modal buttons
427
- cancel_button.click(lambda: Modal(visible=False), None, image_selector_modal)
428
- cancel_button.click(lambda: gr.update(value=None), None, selected_images_gallery)
429
- cancel_button.click(lambda: gr.update(value=None), None, batch_image_gallery)
430
- cancel_button.click(lambda: gr.update(value=None), None, uploaded_images_gallery)
431
- ok_button.click(lambda: Modal(visible=False), None, image_selector_modal)
432
- ok_button.click(lambda images: images, selected_images_gallery, batch_image_gallery)
433
-
434
- # Run HTRflow on selected images
435
- run_button.click(fn=run_htrflow, inputs=[custom_template_yaml, selected_images_gallery], outputs=[collection_submit_state, batch_image_gallery])
436
 
 
437
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
438
 
439
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)
 
1
  import logging
2
  import os
 
3
  import time
4
 
5
  import gradio as gr
6
  import spaces
 
7
  import yaml
8
  from gradio_modal import Modal
9
  from htrflow.pipeline.pipeline import Pipeline
 
149
  return [event.value["image"]["path"]]
150
 
151
 
152
+ def get_selected_example_pipeline(event: gr.SelectData) -> str | None:
153
  """
154
+ Get the name of the pipeline that corresponds to the selected image.
 
155
  """
156
  for name, details in PIPELINES.items():
157
  if event.value["image"]["orig_name"] in details.get("examples", []):
158
  return name
 
159
 
160
 
161
+ def get_image_from_image_url(input_value):
162
  """
163
+ Get URL of the image from either an image_id (from Riksarkivet) or an image_url directly.
164
+ If input_value is an image_id, it constructs the IIIF URL.
165
+ If input_value is an image_url, it returns the URL as-is.
166
  """
 
167
 
168
+ if input_value.startswith("http"):
169
+ return [input_value]
170
+ else:
171
+ input_value = input_value.split(",")
172
 
173
+ return [
174
+ (
175
+ f"https://lbiiif.riksarkivet.se/arkis!{item.strip()}/full/max/0/default.jpg"
176
+ )
177
+ for item in input_value
178
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  with gr.Blocks() as submit:
 
214
  # with Modal(visible=False) as edit_pipeline_modal:
215
 
216
  image_iiif_url = gr.Textbox(
217
+ label="Upload by image ID",
218
+ info=(
219
+ "Use any image from our digitized archives by pasting its image ID found in the "
220
+ "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
221
+ "Press enter to submit."
222
+ ),
223
+ placeholder="R0002231_00005, R0002231_00006",
224
  )
225
 
226
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
 
284
  with gr.Row():
285
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
286
 
287
+ @batch_image_gallery.upload(
288
+ inputs=batch_image_gallery,
289
+ outputs=[batch_image_gallery],
290
+ )
291
+ def validate_images(images):
292
+ if len(images) > MAX_IMAGES:
293
+ gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
294
+ return gr.update(value=None)
295
+ return images
296
+
297
+ image_iiif_url.submit(
298
+ fn=get_image_from_image_url, inputs=image_iiif_url, outputs=batch_image_gallery
299
+ ).then(fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown)
300
+
301
+ run_button.click(
302
+ fn=run_htrflow,
303
+ inputs=[custom_template_yaml, batch_image_gallery],
304
+ outputs=[collection_submit_state, batch_image_gallery],
305
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ examples.select(get_selected_example_image, None, batch_image_gallery)
308
  examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
309
 
310
  edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal)