import gradio as gr import pytesseract import cv2 import os import numpy as np from annif_client import AnnifClient def get_annif_projects(): try: annif = AnnifClient() projects = annif.projects if not projects: raise ValueError("No projects found from Annif client") proj_ids = [project["project_id"] for project in projects] proj_names = [project["name"] for project in projects] return annif, proj_ids, proj_names except Exception as e: print(f"Error initializing Annif client: {str(e)}") return None, [], [] annif, proj_ids, proj_names = get_annif_projects() def process(image, project_num: int, lang: str = "eng"): try: if not proj_ids: raise ValueError("No Annif projects available") if isinstance(image, str): img = cv2.imread(image) if img is None: raise ValueError(f"Unable to read image from path: {image}") elif isinstance(image, np.ndarray): img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) else: raise ValueError("Unsupported image type") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) threshold_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] text = pytesseract.image_to_string(threshold_img, lang=lang) if isinstance(image, str): os.remove(image) results = annif.suggest(project_id=proj_ids[project_num], text=text) sorted_results = sorted(results, key=lambda x: x['score'], reverse=True) html_content = """

Suggested subjects

""" return text, html_content except Exception as e: return str(e), "" langs = ("eng", "fin", "swe") css = """ .gradio-container, .gradio-container * { color: #343260 !important; background-color: #f3f3f6; color: #343260; font-family: Jost, sans-serif; font-weight: 400; font-size: 1.2rem; line-height: 1.2; } body, .mygrclass { background-color: #f3f3f6; color: #343260; font-family: Jost, sans-serif; font-weight: 400; font-size: 1.2rem; line-height: 1.2; } .container { max-width: 1140px; margin: 0 auto; padding: 0 15px; } h1, h1 a { font-weight: 500; font-size: 2rem; text-align: center; } h2 { font-weight: 500; font-size: 1.5rem; padding: 0.5rem 0; } #form { background: linear-gradient(to bottom, #ffffff 0%, #d9dfe3 1%, #f3f3f6 1%, #f3f3f6 99%, #d9dfe3 99%, #ffffff 100%); padding: 2rem 0; } .form-control { border-radius: 0px; } #get-suggestions { margin: 2rem 0; background-color: #6280dc; color: white; border: none; border-radius: 0px; padding-right: 3rem; background-image: url('static/img/arrow-white.svg'); background-position: 97% center; background-repeat: no-repeat; } #suggestions-wrapper { background-color: #f3f3f6; padding: 1rem; } #suggestions { border-top: 1px solid #343260; padding-top: 0.5rem; text-transform: uppercase; font-size: 1.1rem; } .list-group-item { display: flex; align-items: center; padding: 5px 0; border-bottom: 1px solid #e0e0e0; } meter { width: 24px; margin-right: 10px; } meter::-webkit-meter-bar { background-color: #ccc; } meter::-webkit-meter-optimum-value { background: #6280dc; } """ with gr.Blocks(theme=gr.themes.Default(radius_size="none"), css=css) as demo: gr.HTML("""

Annif demo with image/camera input and OCR

""") with gr.Row(): with gr.Column(scale=3): image_input = gr.Image(type="numpy", label="Input Image", elem_classes="mygrclass") with gr.Column(scale=1): project = gr.Dropdown(choices=proj_names, label="Project (vocabulary and language)", type="index", elem_classes="mygrclass", value=proj_names[2]) lang = gr.Dropdown(choices=langs, label="Select Language for OCR", type="value", value="eng", elem_classes="mygrclass") submit_btn = gr.Button("Get suggestions", elem_id="get-suggestions", elem_classes="mygrclass") with gr.Row(): with gr.Column(scale=3): text_output = gr.Textbox(label="Extracted Text", elem_classes="mygrclass") with gr.Column(scale=1): html_output = gr.HTML() submit_btn.click(process, inputs=[image_input, project, lang], outputs=[text_output, html_output]) demo.launch()