import gradio as gr
import torch
import os
auth_token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
from unsloth import FastLanguageModel

instruction = """
From the given email, extract the following key values. The keys are explained below:
* pickup_location: Street address of the origin location of goods.
* pickup_cap: Postal code or ZIP code of the pickup location.
* pickup_port: Port of pickup, often used in international shipping.
* pickup_state: Only Country of pickup location.
* delivery_location: Street address of the destination location of goods.
* delivery_cap: Postal code or ZIP code of delivery location.
* delivery_port: Port of delivery, similar to pickup port.
* delivery_state: State or region of delivery location.
* total_quantity: Overall quantity of shipped items (e.g., pieces, boxes). Calculate the total_quantity by summing the quantity of all packages.
* total_weight: Total weight of the shipment (e.g., kg, lbs). Calculate the total_weight by summing the weights of all packages.
* total_volume: Total volume of the shipment (e.g., cubic meters, cubic feet). Calculate the total_volume by summing the volumes of all packages.
* quantity: Individual Quantity of a specific item being shipped.
* package_type: Individual Type of packaging used (e.g., pallets, cartons).
* weight: Individual Weight of a specific package.
* measures: Individual Dimensions or measurements of a package.
* stackable: Indicates whether the shipment is stackable (True or False).
* volume: Individual Volume of a specific package.
* commodity: Type of goods or commodities being shipped.
* company: Name of the email sending company, also the shipping company or carrier.
* incoterms: Choose available options: EXW, FCA, FAS, FOB, CFR, CIF, CPT, CIP, DAP, DPU, DDP.
"""


# Define the function for generating output based on input
def generate_output(input_text,model):
    # Prompt for the instruction
    
    output = ""
    # Initialize the FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model,
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

    alpaca_prompt = f"""
    Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {instruction}

    ### Input:
    {input_text}

    ### Response:
    """
    # Tokenize the input text
    inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
    # Generate outputs
    outputs = model.generate(**inputs, max_new_tokens=2048, use_cache=True)
    output = tokenizer.batch_decode(outputs)
    return output


model_options = ["sxandie/llama_3_8b_4bitQ"]
inputs = [
    gr.inputs.Textbox(label="Input Text"),
    gr.inputs.Dropdown(label="Model", choices=model_options, default=model_options[0])
]

# Create Gradio interface
iface = gr.Interface(fn=generate_output, 
                     inputs=inputs, 
                     outputs="text", 
                     title="Email Information Extraction", 
                     description="Extract key information from the provided email.")
iface.launch()