Spaces:
Sleeping
Sleeping
File size: 2,970 Bytes
edb0289 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
import google.generativeai as genai
from PIL import Image
def extract_malayalam_text(image, api_key):
"""
Extract handwritten Malayalam text from an image using Gemini 2.0 Flash
Args:
image (PIL.Image): Uploaded image
api_key (str): Google AI Studio API Key
Returns:
str: Extracted text from the image
"""
# Validate inputs
if image is None:
return "Please upload an image first."
if not api_key:
return "Please provide a valid API key."
try:
# Configure the Gemini API directly with the provided key
genai.configure(api_key=api_key)
# Use Gemini 2.0 Flash model
model = genai.GenerativeModel('gemini-2.0-flash')
# Generate content from the image
response = model.generate_content(
[
"You are an expert in extracting handwritten Malayalam text from grocery lists. "
"Carefully transcribe each item in Malayalam script. "
"Provide a clear, accurate list of items. "
"If the text is unclear, mention potential uncertainties.",
image
],
generation_config=genai.types.GenerationConfig(
temperature=0.2, # Low temperature for precise extraction
max_output_tokens=300 # Adjust based on expected list length
)
)
# Return the extracted text
return response.text
except Exception as e:
return f"An error occurred: {str(e)}"
def create_malayalam_ocr_interface():
"""
Create Gradio interface for Malayalam OCR
"""
with gr.Blocks() as demo:
# Title and description
gr.Markdown("# Malayalam Handwritten Text Extractor")
gr.Markdown("Upload a handwritten Malayalam grocery list image for text extraction.")
# API Key input
api_key_input = gr.Textbox(
label="Google AI Studio API Key",
type="password",
placeholder="Enter your Gemini API key"
)
# Image upload component
image_input = gr.Image(
type="pil",
label="Upload Malayalam Grocery List Image"
)
# Extract button
extract_btn = gr.Button("Extract Text")
# Output text area
output_text = gr.Textbox(
label="Extracted Malayalam Text",
lines=10
)
# Connect the components
extract_btn.click(
fn=extract_malayalam_text,
inputs=[image_input, api_key_input],
outputs=output_text
)
return demo
def main():
# Create and launch the Gradio interface
demo = create_malayalam_ocr_interface()
demo.launch(
share=True, # Create a public link
debug=True # Show detailed errors
)
if __name__ == "__main__":
main() |