Spaces:

vrindagopinath
/

mal

Sleeping

mal

File size: 2,970 Bytes

edb0289

import gradio as gr
import google.generativeai as genai
from PIL import Image

def extract_malayalam_text(image, api_key):
    """
    Extract handwritten Malayalam text from an image using Gemini 2.0 Flash
    
    Args:
        image (PIL.Image): Uploaded image
        api_key (str): Google AI Studio API Key
    
    Returns:
        str: Extracted text from the image
    """
    # Validate inputs
    if image is None:
        return "Please upload an image first."
    
    if not api_key:
        return "Please provide a valid API key."

    try:
        # Configure the Gemini API directly with the provided key
        genai.configure(api_key=api_key)

        # Use Gemini 2.0 Flash model
        model = genai.GenerativeModel('gemini-2.0-flash')

        # Generate content from the image
        response = model.generate_content(
            [
                "You are an expert in extracting handwritten Malayalam text from grocery lists. "
                "Carefully transcribe each item in Malayalam script. "
                "Provide a clear, accurate list of items. "
                "If the text is unclear, mention potential uncertainties.", 
                image
            ],
            generation_config=genai.types.GenerationConfig(
                temperature=0.2,  # Low temperature for precise extraction
                max_output_tokens=300  # Adjust based on expected list length
            )
        )

        # Return the extracted text
        return response.text

    except Exception as e:
        return f"An error occurred: {str(e)}"

def create_malayalam_ocr_interface():
    """
    Create Gradio interface for Malayalam OCR
    """
    with gr.Blocks() as demo:
        # Title and description
        gr.Markdown("# Malayalam Handwritten Text Extractor")
        gr.Markdown("Upload a handwritten Malayalam grocery list image for text extraction.")
        
        # API Key input
        api_key_input = gr.Textbox(
            label="Google AI Studio API Key", 
            type="password", 
            placeholder="Enter your Gemini API key"
        )
        
        # Image upload component
        image_input = gr.Image(
            type="pil", 
            label="Upload Malayalam Grocery List Image"
        )
        
        # Extract button
        extract_btn = gr.Button("Extract Text")
        
        # Output text area
        output_text = gr.Textbox(
            label="Extracted Malayalam Text", 
            lines=10
        )
        
        # Connect the components
        extract_btn.click(
            fn=extract_malayalam_text, 
            inputs=[image_input, api_key_input], 
            outputs=output_text
        )

    return demo

def main():
    # Create and launch the Gradio interface
    demo = create_malayalam_ocr_interface()
    demo.launch(
        share=True,  # Create a public link
        debug=True   # Show detailed errors
    )

if __name__ == "__main__":
    main()