import base64
from mimetypes import guess_type
from openai import AzureOpenAI
import os

# Function to encode a local image into a data URL 
def local_image_to_data_url(image_path):
    # Guess the MIME type of the image based on the file extension
    mime_type, _ = guess_type(image_path)
    
    # If MIME type is not found or the file is .webp, set it explicitly
    if mime_type is None or mime_type == 'application/octet-stream':
        if image_path.lower().endswith('.webp'):
            mime_type = 'image/webp'  # Explicitly set for .webp images
        else:
            mime_type = 'application/octet-stream'  # Default MIME type if none is found

    # Read and encode the image file
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

    # Construct the data URL
    return f"data:{mime_type};base64,{base64_encoded_data}"

# Images Path
images_path = "/eph/nvme0/azureml/cr/j/8569d5e3aa08485780b67a53d671e109/exe/wd/1_2M_Dataset"

# Images list
imgs_list = [file for file in os.listdir(images_path)]

# Azure - OpenAI Credential 
api_base = "https://allam-swn-gpt-01.openai.azure.com/" # your endpoint should look like the following https://YOUR_RESOURCE_NAME.openai.azure.com/
api_key="8af2cca79fb34601ab829b44b7fa6dcf"
deployment_name = "gpt-4o-900ptu"
api_version = "2024-02-15-preview" # this might change in the future

# Define a client 
client = AzureOpenAI(
    api_key=api_key,  
    api_version=api_version,
    base_url=f"{api_base}openai/deployments/{deployment_name}",
)


# Iterate over all images
for img_name in imgs_list:
    # Get image path
    img_path = os.path.join(images_path, img_name)

    # Get txt file 
    txt_file_name = img_name.split(".")[0] + ".txt" 
    txt_path = os.path.join(images_path, txt_file_name)

    # Make the local image to a url link to be accepted by the model 
    data_url = local_image_to_data_url(img_path) 

    response = client.chat.completions.create(
        model=deployment_name,
        messages=[
            { "role": "system", "content": "You are an image captioning assistant." },
            { "role": "user", "content": [  
                { 
                    "type": "text", 
                    "text": """You are my captioning model, I will give you a punch of images with their main subject, 
                    and I want you to write a detailed caption based on what you see in the images alone. Take these consideration when writing the caption: 
                    Order the terms in the caption and use commas. The order of the words in the caption directly corresponds to their weight when generating the final image,
                    so a main subject should always be at the start of the prompt. If we want to add more details, 
                    do it in a "narrative style" and using commas to help separate the terms for the FLUX model to read. The tag of this image is: 1/2M cup""" 
                },
                { 
                    "type": "image_url",
                    "image_url": {
                        "url": data_url
                    }
                }
            ] } 
        ],
        max_tokens=2000 
    )

    with open(txt_path, "w") as f:
        f.write(response.choices[0].message.content)