Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
import torch | |
import spaces | |
# Dictionary of model names and their corresponding HuggingFace model IDs | |
MODEL_OPTIONS = { | |
"Microsoft Handwritten": "microsoft/trocr-base-handwritten", | |
"Medieval Base": "medieval-data/trocr-medieval-base", | |
"Medieval Latin Caroline": "medieval-data/trocr-medieval-latin-caroline", | |
"Medieval Castilian Hybrida": "medieval-data/trocr-medieval-castilian-hybrida", | |
"Medieval Humanistica": "medieval-data/trocr-medieval-humanistica", | |
"Medieval Textualis": "medieval-data/trocr-medieval-textualis", | |
"Medieval Cursiva": "medieval-data/trocr-medieval-cursiva", | |
"Medieval Semitextualis": "medieval-data/trocr-medieval-semitextualis", | |
"Medieval Praegothica": "medieval-data/trocr-medieval-praegothica", | |
"Medieval Semihybrida": "medieval-data/trocr-medieval-semihybrida", | |
"Medieval Print": "medieval-data/trocr-medieval-print" | |
} | |
# Global variables to store the current model and processor | |
current_model = None | |
current_processor = None | |
current_model_name = None | |
def load_model(model_name): | |
global current_model, current_processor, current_model_name | |
if model_name != current_model_name: | |
model_id = MODEL_OPTIONS[model_name] | |
current_processor = TrOCRProcessor.from_pretrained(model_id) | |
current_model = VisionEncoderDecoderModel.from_pretrained(model_id) | |
current_model_name = model_name | |
# Move model to GPU | |
current_model = current_model.to('cuda') | |
return current_processor, current_model | |
def process_image(image, model_name): | |
processor, model = load_model(model_name) | |
# Prepare image | |
pixel_values = processor(image, return_tensors="pt").pixel_values | |
# Move input to GPU | |
pixel_values = pixel_values.to('cuda') | |
# Generate (no beam search) | |
with torch.no_grad(): | |
generated_ids = model.generate(pixel_values) | |
# Decode | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return generated_text | |
# Base URL for the images | |
base_url = "https://huggingface.co/medieval-data/trocr-medieval-base/resolve/main/images/" | |
# List of example images and their corresponding models | |
examples = [ | |
[f"{base_url}caroline-1.png", "Medieval Latin Caroline"], | |
[f"{base_url}caroline-2.png", "Medieval Latin Caroline"], | |
[f"{base_url}cursiva-1.png", "Medieval Cursiva"], | |
[f"{base_url}cursiva-2.png", "Medieval Cursiva"], | |
[f"{base_url}cursiva-3.png", "Medieval Cursiva"], | |
[f"{base_url}humanistica-1.png", "Medieval Humanistica"], | |
[f"{base_url}humanistica-2.png", "Medieval Humanistica"], | |
[f"{base_url}humanistica-3.png", "Medieval Humanistica"], | |
[f"{base_url}hybrida-1.png", "Medieval Castilian Hybrida"], | |
[f"{base_url}hybrida-2.png", "Medieval Castilian Hybrida"], | |
[f"{base_url}hybrida-3.png", "Medieval Castilian Hybrida"], | |
[f"{base_url}praegothica-1.png", "Medieval Praegothica"], | |
[f"{base_url}praegothica-2.png", "Medieval Praegothica"], | |
[f"{base_url}praegothica-3.png", "Medieval Praegothica"], | |
[f"{base_url}print-1.png", "Medieval Print"], | |
[f"{base_url}print-2.png", "Medieval Print"], | |
[f"{base_url}print-3.png", "Medieval Print"], | |
[f"{base_url}semihybrida-1.png", "Medieval Semihybrida"], | |
[f"{base_url}semihybrida-2.png", "Medieval Semihybrida"], | |
[f"{base_url}semihybrida-3.png", "Medieval Semihybrida"], | |
[f"{base_url}semitextualis-1.png", "Medieval Semitextualis"], | |
[f"{base_url}semitextualis-2.png", "Medieval Semitextualis"], | |
[f"{base_url}semitextualis-3.png", "Medieval Semitextualis"], | |
[f"{base_url}textualis-1.png", "Medieval Textualis"], | |
[f"{base_url}textualis-2.png", "Medieval Textualis"], | |
[f"{base_url}textualis-3.png", "Medieval Textualis"], | |
] | |
# Custom CSS to make the image wider | |
custom_css = """ | |
#image_upload { | |
max-width: 100% !important; | |
width: 100% !important; | |
height: auto !important; | |
} | |
#image_upload > div:first-child { | |
width: 100% !important; | |
} | |
#image_upload img { | |
max-width: 100% !important; | |
width: 100% !important; | |
height: auto !important; | |
} | |
""" | |
# Gradio interface | |
with gr.Blocks(css=custom_css) as iface: | |
gr.Markdown("# Medieval TrOCR Model Switcher") | |
gr.Markdown("Upload an image of medieval text and select a model to transcribe it. Note: This tool is designed to work on a single line of text at a time for optimal results.") | |
with gr.Row(): | |
input_image = gr.Image(type="pil", label="Input Image", elem_id="image_upload") | |
model_dropdown = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Select Model", value="Medieval Base") | |
transcription_output = gr.Textbox(label="Transcription") | |
submit_button = gr.Button("Transcribe") | |
submit_button.click(fn=process_image, inputs=[input_image, model_dropdown], outputs=transcription_output) | |
gr.Examples(examples, inputs=[input_image, model_dropdown], outputs=transcription_output) | |
iface.launch() |