import gradio as gr import torch import re import os auth_token = os.environ.get("HUGGING_FACE_HUB_TOKEN") from unsloth import FastLanguageModel instruction = """ From the given email, extract the following key values. The keys are explained below: * pickup_location: Street address of the origin location of goods. * pickup_cap: Postal code or ZIP code of the pickup location. * pickup_port: Port of pickup, often used in international shipping. * pickup_state: Only Country of pickup location. * delivery_location: Street address of the destination location of goods. * delivery_cap: Postal code or ZIP code of delivery location. * delivery_port: Port of delivery, similar to pickup port. * delivery_state: State or region of delivery location. * total_quantity: Overall quantity of shipped items (e.g., pieces, boxes). Calculate the total_quantity by summing the quantity of all packages. * total_weight: Total weight of the shipment (e.g., kg, lbs). Calculate the total_weight by summing the weights of all packages. * total_volume: Total volume of the shipment (e.g., cubic meters, cubic feet). Calculate the total_volume by summing the volumes of all packages. * quantity: Individual Quantity of a specific item being shipped. * package_type: Individual Type of packaging used (e.g., pallets, cartons). * weight: Individual Weight of a specific package. * measures: Individual Dimensions or measurements of a package. * stackable: Indicates whether the shipment is stackable (True or False). * volume: Individual Volume of a specific package. * commodity: Type of goods or commodities being shipped. * company: Name of the email sending company, also the shipping company or carrier. * incoterms: Choose available options: EXW, FCA, FAS, FOB, CFR, CIF, CPT, CIP, DAP, DPU, DDP. """ def process_output(output): """ Process the output to extract the response. """ # Define the regex pattern pattern = r'### Response:\n?(.*?)<\|endoftext\|>' # Search for the pattern in the output match = re.search(pattern, output, re.DOTALL) if match: # Extract the response response = match.group(1) # Remove specified symbols cleaned_str = re.sub(r'\\n|\\\\|\\\'', '', response) return cleaned_str else: return output # Define the function for generating output based on input def generate_output(input_text,model): # Prompt for the instruction output = "" # Initialize the FastLanguageModel model, tokenizer = FastLanguageModel.from_pretrained( model_name = "DataIntelligenceTeam/NER-Phi-3-mini-4k-instruct", max_seq_length = 2500, dtype = None, load_in_4bit = True, ) FastLanguageModel.for_inference(model) # Enable native 2x faster inference alpaca_prompt = f""" Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Input: {input_text} ### Response: """ # Tokenize the input text inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda") # Generate outputs outputs = model.generate(**inputs, max_new_tokens=2500, use_cache=True) output = tokenizer.batch_decode(outputs) cleaned_response = process_output(str(output)) return output,cleaned_response examples = [ ["""dubai", "Dear Team, ; Please quote SEA option for below inquiry ; POD :Jebel Ali . ; estimated packing details ; 3300x950x1350 mm ; 3800x1700x1350 mm ; 1900x2000x1200 mm ; 1900x2000x1200 mm ; 2600x1300x1200 mm ; 3900x1100x1350 mm ; 820x780x950 mm ; Total weight kg 5000 approx ; Not stackable ; Marrone Srl ; via Rui, 5 - Loc. Prà dei Risi ; I - 33080 Zoppola (PN) ; Warehouse: 08:00 – 12:00 / 13:00 – 16:30 ; Warehouse Handling + Free days w.e.f 01-01-2024 ; AED 250 + vat until 10 CBM - AED 350 + Vat 11 CBM on wards ; Free time applicable - 7 days only for General cargo from DO readiness & No free time for HAZ CARGO, post which standard storage tariff will apply. ; Important Notice ; : Considering the ongoing circumstances, please be informed after booking confirmation the rates and schedules may undergo changes without advance notice. ; Please also expect vessel delays and changes in transit time. ; CSS will not be liable for any claims that arise due to this. ; Deepak Unnikrishnan ; Sr Sales Coordinator ; Consolidated Shipping Service L.L.C. ; Office: +971 4 883 1303 | Ext: 1163| Toll Free: 800277 | Mobile: +971501513697| ; Dubai | United Arab Emirates ; nvo-import5@cssdubai.com | www.cssgroupsite.com ; Our Offices: Dubai | Abu Dhabi | Sharjah | Ras-Al-Khaimah | Bahrain | Oman | Qatar | Saudi Arabia | Kuwait | Iraq | Africa | Turkiye | India | Sri Lanka | ; Disclaimer: This email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please delete all copies and notify the sender immediately. ; Please read our ; disclaimer."""], ["""I: CIC/2324/806 // Rates Ex Italy To Chennai Port", "Dear Ceriana , ; Please quote your best rate Ex-Works Cesena Italy, By Sea to Chennai Sea Port for below given details : ; Cargo is non stackable and non tiltable, send quote accordingly. ; ITEM - EQUIPMENT FOR PHYSICAL EXERCISE ; Total - 10 Packages ; Net Weight - 1838.62 Kgs ; Total Weight - 2291.85 Kgs ; Total Volume (m3) - 17.36 ; Refer attached Packing List for Dimensions, etc. ; Pick-Up Address : ; Technogym S.P.A ; Via Calcinaro, 2861 ; 47521 Cesena ( FC) ; Italy ; Thanks & Regards, ; RAKSHANA.R ; A Group of RADAR VENTURES PVT LTD. ; 6th Floor, Menon Eternity, St. Marys Road, ; Alwarpet, Chennai-600018 ; PH NO:7305888727 ; MAIL ID : ; rakshana.r@magikcargo.com"""], ["""I: KEELUNG", "Ciao Paolo, ; Per favore mi quoti per: ; Winery: Cantina Tollo ; pick up::66010 ; POD: Keelung ; Cargo size : 120x 100x190 cm/ 2 Pallets 1581/Kg ; 210 cases ,1581kg, 2Pallets about 4.56 CBM ; Grazie ; Cordiali saluti/ Best regards ; ________________________________________________________________________________ ; Franco Raiola – Area manager ; Cargo Compass spa ; Via Lavoria 56/L/M/N | Cenaia | Crespina Lorenzana | Pisa | 56040 | ITALY ; fraiola@cargocompass.it | O +39.050.643133 | F +39.050.644687 | C+39.335.1277205 | www.cargocompassworld.com"""]] model_options = ["DataIntelligenceTeam/NER-Phi-3-mini-4k-instruct"] #,"DataIntelligenceTeam/NER-gemma-7b-bnb-4bit","DataIntelligenceTeam/llama-3-8b-Instruct-bnb-4bit","DataIntelligenceTeam/mistral-7b-instruct-v0.2-bnb-4bit"] # Create Gradio interface iface = gr.Interface(fn=generate_output, inputs=[ gr.inputs.Textbox(label="Input Text"), #gr.inputs.Dropdown(label="Select the Fine-tuned Model", choices=["DataIntelligenceTeam/NER-Phi-3-mini-4k-instruct","DataIntelligenceTeam/llama-3-8b-Instruct-bnb-4bit"]) ], outputs=[ gr.outputs.Textbox(label="Original Output Text"), gr.outputs.Textbox(label="Formatted JSON") ], examples=examples, title="Email Information Extraction", description="Extract key information from the provided email.") iface.launch()