texstral / app.py
Francesco Capuano
fix: minor
2dd2156
raw
history blame
3.21 kB
import os
import gradio as gr
from mistralai import Mistral
from PIL import Image
import io
from dotenv import load_dotenv
import base64
load_dotenv()
# Initialize Mistral client
def get_mistral_client():
api_key = os.environ.get("MISTRAL_API_KEY")
if not api_key:
raise ValueError("MISTRAL_API_KEY environment variable not set")
return Mistral(api_key=api_key)
def process_image(image):
"""Convert PIL Image to base64 string for Mistral"""
# Convert image to bytes
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='jpeg')
img_byte_arr = img_byte_arr.getvalue()
return base64.b64encode(img_byte_arr).decode('utf-8')
def transcribe_image(image, client):
"""Send image to Mistral and get LaTeX transcription"""
base64_image = process_image(image)
# Construct the prompt for the Mistral agent
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please transcribe this handwritten mathematical notation into LaTeX. Only provide the LaTeX code, no explanations."
},
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}"
}
]
}
]
# Get response from Mistral
response = client.agents.complete(
agent_id="ag:d40a8e90:20241105:mvastral:610ba98d",
messages=messages
)
return response.choices[0].message.content
def transcribe_and_edit(image):
"""Main function for Gradio interface"""
try:
client = get_mistral_client()
# First transcription
latex_content = transcribe_image(image, client)
return latex_content, latex_content
except Exception as e:
return f"Error: {str(e)}", ""
# Create Gradio interface
with gr.Blocks(title="TexStral: Turn handwriting into LaTeX ✨") as demo:
gr.Markdown("""
# Handwriting to LaTeX Converter
Upload an image containing mathematical handwriting and get it parsed into LateX code. Uses Mistral AI's Pixtral model!
""")
with gr.Row():
# Left column for image upload
with gr.Column():
image_input = gr.Image(
label="Upload Image",
type="pil",
height=400
)
convert_btn = gr.Button("Convert to LaTeX", variant="primary")
# Right column for LaTeX output
with gr.Column():
# Raw LaTeX output
latex_output = gr.Code(
label="Generated LaTeX",
language="markdown",
lines=10
)
# Editable LaTeX
latex_editor = gr.Textbox(
label="Edit LaTeX",
lines=10,
max_lines=20,
show_copy_button=True
)
# Button to trigger conversion
convert_btn.click(
fn=transcribe_and_edit,
inputs=[image_input],
outputs=[latex_output, latex_editor]
)
# Launch the app
if __name__ == "__main__":
demo.launch()