mal / app.py
vrindagopinath's picture
Create app.py
edb0289 verified
import gradio as gr
import google.generativeai as genai
from PIL import Image
def extract_malayalam_text(image, api_key):
"""
Extract handwritten Malayalam text from an image using Gemini 2.0 Flash
Args:
image (PIL.Image): Uploaded image
api_key (str): Google AI Studio API Key
Returns:
str: Extracted text from the image
"""
# Validate inputs
if image is None:
return "Please upload an image first."
if not api_key:
return "Please provide a valid API key."
try:
# Configure the Gemini API directly with the provided key
genai.configure(api_key=api_key)
# Use Gemini 2.0 Flash model
model = genai.GenerativeModel('gemini-2.0-flash')
# Generate content from the image
response = model.generate_content(
[
"You are an expert in extracting handwritten Malayalam text from grocery lists. "
"Carefully transcribe each item in Malayalam script. "
"Provide a clear, accurate list of items. "
"If the text is unclear, mention potential uncertainties.",
image
],
generation_config=genai.types.GenerationConfig(
temperature=0.2, # Low temperature for precise extraction
max_output_tokens=300 # Adjust based on expected list length
)
)
# Return the extracted text
return response.text
except Exception as e:
return f"An error occurred: {str(e)}"
def create_malayalam_ocr_interface():
"""
Create Gradio interface for Malayalam OCR
"""
with gr.Blocks() as demo:
# Title and description
gr.Markdown("# Malayalam Handwritten Text Extractor")
gr.Markdown("Upload a handwritten Malayalam grocery list image for text extraction.")
# API Key input
api_key_input = gr.Textbox(
label="Google AI Studio API Key",
type="password",
placeholder="Enter your Gemini API key"
)
# Image upload component
image_input = gr.Image(
type="pil",
label="Upload Malayalam Grocery List Image"
)
# Extract button
extract_btn = gr.Button("Extract Text")
# Output text area
output_text = gr.Textbox(
label="Extracted Malayalam Text",
lines=10
)
# Connect the components
extract_btn.click(
fn=extract_malayalam_text,
inputs=[image_input, api_key_input],
outputs=output_text
)
return demo
def main():
# Create and launch the Gradio interface
demo = create_malayalam_ocr_interface()
demo.launch(
share=True, # Create a public link
debug=True # Show detailed errors
)
if __name__ == "__main__":
main()