import gradio as gr import os import tempfile from pathlib import Path import secrets import dashscope from dashscope import MultiModalConversation, Generation from PIL import Image # API key setup YOUR_API_TOKEN = os.getenv('YOUR_API_TOKEN') dashscope.api_key = YOUR_API_TOKEN # Global variables math_messages = [] image_descriptions = [] def process_image(image, shouldConvert=False): uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str( Path(tempfile.gettempdir()) / "gradio" ) os.makedirs(uploaded_file_dir, exist_ok=True) name = f"tmp{secrets.token_hex(20)}.jpg" filename = os.path.join(uploaded_file_dir, name) if shouldConvert: new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255)) new_img.paste(image, (0, 0), mask=image) image = new_img image.save(filename) messages = [{ 'role': 'system', 'content': [{'text': 'You are a helpful assistant.'}] }, { 'role': 'user', 'content': [ {'image': f'file://{filename}'}, {'text': 'Please describe the math-related content in this image, ensuring that any LaTeX formulas are correctly transcribed. Non-mathematical details do not need to be described.'} ] }] response = MultiModalConversation.call(model='qwen-vl-max-0809', messages=messages) os.remove(filename) return response.output.choices[0]["message"]["content"] def get_math_response(image_descriptions, user_question): global math_messages if not math_messages: math_messages.append({'role': 'system', 'content': 'You are a helpful math assistant.'}) content = "Image descriptions:\n" + "\n".join(image_descriptions) if image_descriptions else "" content += f"\n\nUser question: {user_question}" math_messages.append({'role': 'user', 'content': content}) response = Generation.call( model="qwen2.5-math-72b-instruct", messages=math_messages, result_format='message', stream=True ) answer = "" for resp in response: if resp.output is None: continue answer = resp.output.choices[0].message.content yield answer.replace("\\", "\\\\") math_messages.append({'role': 'assistant', 'content': answer}) def math_chat_bot(images, sketchpad, question, chat_history): global image_descriptions # Process new images for image in images: if image: description = process_image(image) image_descriptions.append(description) # Process sketchpad if present if sketchpad and sketchpad["composite"]: sketch_description = process_image(sketchpad["composite"], True) image_descriptions.append(sketch_description) # Generate response response = "" for chunk in get_math_response(image_descriptions, question): response += chunk yield chat_history + [(question, response)] css = """ #qwen-md .katex-display { display: inline; } #qwen-md .katex-display>.katex { display: inline; } #qwen-md .katex-display>.katex>.katex-html { display: inline; } """ # Create Gradio interface with gr.Blocks(css=css) as demo: gr.HTML("""\
""" """