Spaces:

wjbmattingly
/

medieval-htr

Sleeping

App Files Files Community

wjbmattingly commited on Aug 9, 2024

Commit

0c1fe20

verified ·

1 Parent(s): ea32023

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -28

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-import requests
-from PIL import Image
 # Dictionary of model names and their corresponding HuggingFace model IDs
 MODEL_OPTIONS = {
@@ -18,49 +17,57 @@ MODEL_OPTIONS = {
     "Medieval Print": "medieval-data/trocr-medieval-print"
 }
-# Load image examples
-urls = [
-    'https://huggingface.co/medieval-data/trocr-medieval-base/resolve/main/images/caroline-1.png'
-]
-for idx, url in enumerate(urls):
-    image = Image.open(requests.get(url, stream=True).raw)
-    image.save(f"image_{idx}.png")
 def load_model(model_name):
-    model_id = MODEL_OPTIONS[model_name]
-    processor = TrOCRProcessor.from_pretrained(model_id)
-    model = VisionEncoderDecoderModel.from_pretrained(model_id)
-    return processor, model
 def process_image(image, model_name):
     processor, model = load_model(model_name)
-    # prepare image
     pixel_values = processor(image, return_tensors="pt").pixel_values
-    # generate (no beam search)
-    generated_ids = model.generate(pixel_values)
-    # decode
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return generated_text
-title = "Interactive demo: TrOCR Model Switcher"
-description = "Demo for the Medieval TrOCR HTR Models."
 iface = gr.Interface(
     fn=process_image,
     inputs=[
-        gr.Image(type="pil"),
-        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Select Model")
     ],
-    outputs=gr.Textbox(),
-    title=title,
-    description=description,
     examples=[
-        ["image_0.png", "Medieval Latin Caroline"]
     ]
 )
-iface.launch(debug=True)

 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import torch
 # Dictionary of model names and their corresponding HuggingFace model IDs
 MODEL_OPTIONS = {
     "Medieval Print": "medieval-data/trocr-medieval-print"
 }
+# Global variables to store the current model and processor
+current_model = None
+current_processor = None
+current_model_name = None
 def load_model(model_name):
+    global current_model, current_processor, current_model_name
+    if model_name != current_model_name:
+        model_id = MODEL_OPTIONS[model_name]
+        current_processor = TrOCRProcessor.from_pretrained(model_id)
+        current_model = VisionEncoderDecoderModel.from_pretrained(model_id)
+        current_model_name = model_name
+        # Move model to GPU if available
+        if torch.cuda.is_available():
+            current_model = current_model.to('cuda')
+    return current_processor, current_model
 def process_image(image, model_name):
     processor, model = load_model(model_name)
+    # Prepare image
     pixel_values = processor(image, return_tensors="pt").pixel_values
+    # Move input to GPU if model is on GPU
+    if next(model.parameters()).is_cuda:
+        pixel_values = pixel_values.to('cuda')
+    # Generate (no beam search)
+    with torch.no_grad():
+        generated_ids = model.generate(pixel_values)
+    # Decode
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return generated_text
+# Gradio interface
 iface = gr.Interface(
     fn=process_image,
     inputs=[
+        gr.Image(type="pil", label="Input Image"),
+        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Select Model", value="Medieval Base")
     ],
+    outputs=gr.Textbox(label="Transcription"),
+    title="Medieval TrOCR Model Switcher",
+    description="Upload an image of medieval text and select a model to transcribe it.",
     examples=[
+        ["https://huggingface.co/medieval-data/trocr-medieval-base/resolve/main/images/caroline-1.png", "Medieval Latin Caroline"]
     ]
 )
+iface.launch()