import json import os import gradio as gr import pandas as pd from PIL import Image from google import genai # Client and prompt setup client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY')) model_name = "gemini-2.0-flash-exp" # Change to other models, but be careful as response might be with different structure safety_settings = [ genai.types.SafetySetting( category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH", ), ] bounding_box_system_instructions = """Return bounding boxes as a JSON array with labels, CO2 estimate, and an explanation. Never return masks or code fencing. Limit to 5 objects.""" prompt = """Provide an estimation of how much CO2 is involved in all activities in this picture. Give CO2 in grams. As examples, think of transport, smoking, meat, and other similar emission activities. Do not provide actions that don't have CO2 emissions. Be comprehensive, but don't list more than 10 objects. Detect the 2D bounding boxes of these activities, including the label, the CO2 gram quantity, and a short explanation explaining the estimation for each activity. """ def parse_json(json_output): # Based on https://github.com/google-gemini/cookbook/blob/main/gemini-2/spatial_understanding.ipynb lines = json_output.splitlines() for i, line in enumerate(lines): if line == "```json": json_output = "\n".join(lines[i+1:]) # Remove everything before "```json" json_output = json_output.split("```")[0] # Remove everything after the closing "```" break # Exit the loop once "```json" is found return json.loads(json_output) def parse_info(image, json_data): width, height = image.size df_data = [] boxes_with_labels = [] # Iterate over each detected action actions for action in json_data: box_2d = action.get("box_2d") label = action.get("label") co2_grams = action.get("co2_grams") explanation = action.get("explanation") if not all([box_2d, label, co2_grams, explanation]): continue # Convert normalized coordinates to absolute coordinates abs_y1 = int(box_2d[0] / 1000 * height) abs_x1 = int(box_2d[1] / 1000 * width) abs_y2 = int(box_2d[2] / 1000 * height) abs_x2 = int(box_2d[3] / 1000 * width) abs_x1, abs_x2 = min(abs_x1, abs_x2), max(abs_x1, abs_x2) abs_y1, abs_y2 = min(abs_y1, abs_y2), max(abs_y1, abs_y2) boxes_with_labels.append([(abs_x1, abs_y1, abs_x2, abs_y2), label]) df_data.append({ "label": label, "co2": co2_grams, "explanation": explanation }) return boxes_with_labels, pd.DataFrame(df_data) def estimate_co2(image): resized_image = image.resize( (1024, int(1024 * image.size[1] / image.size[0])), Image.Resampling.LANCZOS ) # Get resuls from model response = client.models.generate_content( model=model_name, contents=[prompt, resized_image], config = genai.types.GenerateContentConfig( system_instruction=bounding_box_system_instructions, temperature=0.4, safety_settings=safety_settings ) ) json_data = parse_json(response.text) boxes_with_labels, data = parse_info(resized_image, json_data) return [resized_image, boxes_with_labels], data iface = gr.Interface( fn=estimate_co2, inputs=gr.Image(type="pil"), outputs=[ gr.AnnotatedImage(), gr.Dataframe( label="CO2 Estimation Data", interactive=False, headers=["co2", "item_name", "rationale"] ) ], title="CO2 Estimation from Images", description="Upload an image and get an estimation of the CO2 involved in the activities depicted.", article="This is a very rough estimate, and can be misleading or factually inaccurate. Take this as a demo project and not as scientific/exact results." #examples=[ # ["example.jpeg"] # Add an example image if you have one #], ) markdown = """# CO2 Estimation Upload an image and get an **estimation** of the CO2 involved in the activities depicted. This is a very rough estimate, and can be misleading or factually inaccurate. Take this as a demo project and not as scientific/exact results. Powered by [the Gemini API](https://ai.google.dev/gemini-api/docs) and [AI Studio](https://aistudio.google.com/). """ with gr.Blocks() as demo: with gr.Row(): gr.Markdown(markdown) with gr.Row(): input_image = gr.Image(type="pil", label="Input Image") output_image = gr.AnnotatedImage(label="Output Image") with gr.Row(): output_dataframe = gr.Dataframe( label="CO2 Estimated Data", interactive=False, headers=["co2", "item_name", "rationale"] ) gr.Examples( examples=[ "car_smoke.jpg", "grill.jpeg", ], inputs=input_image, label="Try these examples:", ) input_image.change( fn=estimate_co2, inputs=input_image, outputs=[output_image, output_dataframe] ) demo.launch()