Francesco Capuano commited on
Commit
ef0674c
1 Parent(s): 507730e

add: initial commit

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from mistralai import Mistral
4
+ from PIL import Image
5
+ import io
6
+ from dotenv import load_dotenv
7
+ # Convert to base64 string
8
+ import base64
9
+
10
+ load_dotenv()
11
+
12
+ # Initialize Mistral client
13
+ def get_mistral_client():
14
+ api_key = os.environ.get("MISTRAL_API_KEY")
15
+ if not api_key:
16
+ raise ValueError("MISTRAL_API_KEY environment variable not set")
17
+ return Mistral(api_key=api_key)
18
+
19
+ def process_image(image):
20
+ """Convert PIL Image to base64 string for Mistral"""
21
+ # Convert image to bytes
22
+ img_byte_arr = io.BytesIO()
23
+ image.save(img_byte_arr, format='jpeg')
24
+ img_byte_arr = img_byte_arr.getvalue()
25
+
26
+
27
+ return base64.b64encode(img_byte_arr).decode('utf-8')
28
+
29
+ def transcribe_image(image, client):
30
+ """Send image to Mistral and get LaTeX transcription"""
31
+ base64_image = process_image(image)
32
+
33
+ # Construct the prompt for the Mistral agent
34
+ messages = [
35
+ {
36
+ "role": "user",
37
+ "content": [
38
+ {
39
+ "type": "text",
40
+ "text": "Please transcribe this handwritten mathematical notation into LaTeX. Only provide the LaTeX code, no explanations."
41
+ },
42
+ {
43
+ "type": "image_url",
44
+ "image_url": f"data:image/jpeg;base64,{base64_image}"
45
+ }
46
+ ]
47
+ }
48
+ ]
49
+
50
+ # Get response from Mistral
51
+ response = client.agents.complete(
52
+ agent_id="ag:d40a8e90:20241105:mvastral:610ba98d",
53
+ messages=messages
54
+ )
55
+
56
+ return response.choices[0].message.content
57
+
58
+ def transcribe_and_edit(image):
59
+ """Main function for Gradio interface"""
60
+ try:
61
+ client = get_mistral_client()
62
+ # First transcription
63
+ latex_content = transcribe_image(image, client)
64
+ return latex_content, latex_content
65
+ except Exception as e:
66
+ return f"Error: {str(e)}", ""
67
+
68
+ # Create Gradio interface
69
+ with gr.Blocks(title="TexStral: Turn handwriting into LaTeX ✨") as demo:
70
+ gr.Markdown("""
71
+ # Handwriting to LaTeX Converter
72
+ Upload an image containing mathematical handwriting and get it parsed into LateX code. Uses Mistral AI's Pixtral model!
73
+ """)
74
+
75
+ with gr.Row():
76
+ # Left column for image upload
77
+ with gr.Column():
78
+ image_input = gr.Image(
79
+ label="Upload Image",
80
+ type="pil",
81
+ height=400
82
+ )
83
+ convert_btn = gr.Button("Convert to LaTeX", variant="primary")
84
+
85
+ # Right column for LaTeX output
86
+ with gr.Column():
87
+ # Raw LaTeX output
88
+ latex_output = gr.Code(
89
+ label="Generated LaTeX",
90
+ language="markdown",
91
+ lines=10
92
+ )
93
+ # Editable LaTeX
94
+ latex_editor = gr.Textbox(
95
+ label="Edit LaTeX",
96
+ lines=10,
97
+ max_lines=20,
98
+ show_copy_button=True
99
+ )
100
+
101
+ # Button to trigger conversion
102
+ convert_btn.click(
103
+ fn=transcribe_and_edit,
104
+ inputs=[image_input],
105
+ outputs=[latex_output, latex_editor]
106
+ )
107
+
108
+ # Launch the app
109
+ if __name__ == "__main__":
110
+ demo.launch()