import gradio as gr from transformers import pipeline import openpyxl import tempfile import pycountry from deep_translator import GoogleTranslator from langdetect import detect classifier = pipeline("zero-shot-classification", model="LogicSpine/address-large-text-classifier") def translate_text(text: str): text = text.strip() translator = GoogleTranslator(source='auto', target='en') detected_lang = detect(text) if detected_lang == 'en': return text translated = translator.translate(text) return translated def check_for_third(address: str) -> bool: countries = [country.name.lower() for country in pycountry.countries] old_country_names = [ "burma", "ceylon", "persia", "zaire", "upper volta", "swaziland", "macedonia", "czech republic", "turkey", "holland", "kampuchea", "dahomey", "bechuanaland", "gold coast", "nyasaland", "korea", "russia", "usa", "uk" ] countries = countries + old_country_names if "," in address: address = address.split(",") else: address = [address.strip()] for ad in address: if ad.lower().strip() in countries: return True return False def check_for_first(address: str) -> bool: keyword_list = ["school", "laboratory", "department"] for key in keyword_list: if key.lower() in address.lower(): return True return False def check_for_second(address: str) -> bool: keyword_list = ["university"] for key in keyword_list: if key.lower() in address.lower().strip(): return True return False def compaire_two(bigger: str, smaller: str, mid: int) -> bool: """Helps to find the result according to the priority Args: bigger (str): Pass the bigger smaller (str): And smaller mid (int): Pass the mid where 1 reffer to 1st and 2 as 2nd and 3 as 3rd Raises: ValueError: If invalid mid is passed Returns: bool: if bigger have more priority then True else False """ if mid == 1: if check_for_first(bigger): return True lab = ["School", "Department", "Laboratory"] elif mid == 2: if check_for_second(bigger): return True lab = ["University", "Polytechnic"] elif mid == 3: if check_for_third(bigger): return True lab = ["State", "District", "Country"] else: raise ValueError(f"Invalid value passed in mid : {mid}") sb = classifier(bigger, lab) ss = classifier(smaller, lab) result_bigger = sum(sb["scores"]) result_smaller = sum(ss["scores"]) if result_bigger > result_smaller: return True return False def get_ai_position(address: str) -> int: """This function use AI to find the position of the address Args: address (str): Pass the address here Returns: int: Return the mid 1 for 4th and 2 for 5th and 3 for 6th """ if check_for_first(address): return 1 if check_for_second(address): return 2 if check_for_third(address): return 3 result_first = sum(classifier(address, ["School", "Department", "Laboratory"])["scores"]) result_second = sum(classifier(address, ["University", "Polytechnic"])["scores"]) result_third = sum(classifier(address, ["State", "District", "Country"])["scores"]) total = max(result_first, result_second, result_third) if total == result_first: return 1 elif total == result_second: return 2 if total == result_third: return 3 else: return 3 def compare_by_mid(bigger: int, smaller: int, address: str, threshold: float = 0.1) -> bool: """Helps to find the proper position for the address according to the mid Args: bigger (int): Pass the mid 1, 2 or 3 smaller (int): Pass the mid 1, 2 or 3 address (str): If possibility of bigger is more then return True else False threshold (float): Minimum score difference to consider valid comparison Returns: bool: Boolean """ if bigger == 1: if check_for_first(address): return True bigger_l = ["School", "Department", "Laboratory"] elif bigger == 2: if check_for_second(address): return True bigger_l = ["University", "Polytechnic"] else: if check_for_third(address): return True bigger_l = ["State", "District", "Country"] if smaller == 1: smaller_l = ["School", "Department", "Laboratory"] elif smaller == 2: smaller_l = ["University", "Polytechnic"] else: smaller_l = ["State", "District", "Country"] result_bigger = classifier(address, bigger_l) result_smaller = classifier(address, smaller_l) max_bigger = max(result_bigger["scores"]) max_smaller = max(result_smaller["scores"]) score_difference = max_smaller - max_bigger return score_difference > threshold def find_missing_data(data1: str, data2: str, data3: str, var1: str, var2: str, var3: str) -> str: """Helps to find the missing data Args: data1 (str): Pass the first data or you can say address data2 (str): Pass the 2nd address data3 (str): pass third address var1 (str): pass the first variable to check var2 (str): pass 2nd variable to check var3 (str): pass the third variable to check the 3rd address Returns: str: return the address as string """ data_set = {data1, data2, data3} variables_filled = {var1, var2, var3} missing_data = data_set - variables_filled if missing_data: return ', '.join(missing_data) else: return "All data has been assigned correctly." def swapper(i1, i2, i3): first, second, third = None, None, None inputs = [i1, i2, i3] first_candidates = [] for data in inputs: original_data = data if check_for_third(translate_text(data)): if third is None: third = data else: third, data = data, third if check_for_first(translate_text(data)): first_candidates.append(data) elif check_for_second(translate_text(data)): if second is None: second = data else: second, data = data, second elif check_for_first(translate_text(data)): first_candidates.append(data) elif check_for_second(translate_text(data)): if second is None: second = data else: second, data = data, second if first_candidates: first = first_candidates[0] if first is None else first if len(first_candidates) > 1: second = first_candidates[1] if second is None else second remaining_data = [i1, i2, i3] if first is None: try: first = remaining_data.pop(remaining_data.index(next(filter(lambda x: x not in {first, second, third}, remaining_data), None))) except: first = i1 if second is None: try: second = remaining_data.pop(remaining_data.index(next(filter(lambda x: x not in {first, second, third}, remaining_data), None))) except: second = i2 if third is None: try: third = remaining_data.pop(remaining_data.index(next(filter(lambda x: x not in {first, second, third}, remaining_data), None))) except: third = i3 return first, second, third def settle_all_address(address_first: str, address_second: str, address_third: str): address_1 = address_first address_2 = address_second address_3 = address_third r_add1 = None r_add2 = None r_add3 = None # Check for first function if check_for_first(address_first): r_add1 = address_first elif check_for_first(address_second): r_add1 = address_second elif check_for_first(address_third): r_add1 = address_third # Check for second function if check_for_second(address_first): r_add2 = address_first elif check_for_second(address_second): r_add2 = address_second elif check_for_second(address_third): r_add2 = address_third # Check for third function if check_for_third(address_first): r_add3 = address_first elif check_for_third(address_second): r_add3 = address_second elif check_for_third(address_third): r_add3 = address_third if r_add1 == r_add2 or r_add1 == r_add3 or r_add2 == r_add3: # Duplicate data found now perform the comparizon in here if r_add1 == r_add2 == r_add3: r_add1 = None r_add2 = None r_add3 = None else: if r_add1 == r_add2 and r_add1 != None: # If address 1 and address 2 is same then use AI for checking m_add = find_missing_data(address_1, address_2, address_3, r_add1, r_add2, r_add3) # Find the missing address and add it to the r_add3 if compaire_two(m_add, r_add2, 1): r_add2 = m_add else: r_add1 = m_add elif r_add1 == r_add3 and r_add1 != None: m_add = find_missing_data(address_1, address_2, address_3, r_add1, r_add2, r_add3) # Find the missing address and add it to the r_add3 if compaire_two(m_add, r_add3, 1): r_add1 = m_add else: r_add3 = m_add elif r_add2 == r_add3 and r_add2 != None: m_add = find_missing_data(address_1, address_2, address_3, r_add1, r_add2, r_add3) # Find the missing address and add it to the r_add3 if compaire_two(m_add, r_add3, 3): r_add3 = m_add else: r_add2 = m_add if r_add1 == None or r_add2 == None or r_add3 == None: # if any of them is None then calculate the address ai_position1 = get_ai_position(address_1) ai_position2 = get_ai_position(address_2) ai_position3 = get_ai_position(address_3) if ai_position1 == 3: if r_add3: pass else: r_add3 = address_1 if r_add1 == None or r_add2 == None: if r_add3 == address_1: if compare_by_mid(1, 2, address_2): r_add1 = address_2 r_add2 = address_3 else: r_add1 = address_3 r_add2 = address_2 elif r_add3 == address_2: if compare_by_mid(1, 2, address_1): r_add1 = address_1 r_add2 = address_3 else: r_add1 = address_3 r_add2 = address_1 elif r_add3 == address_3: if compare_by_mid(1, 2, address_1): r_add1 = address_1 r_add2 = address_2 else: r_add1 = address_2 r_add2 = address_1 elif ai_position1 == 2: if r_add2: pass else: r_add2 = address_1 if r_add1 == None or r_add3 == None: if r_add2 == address_1: if compare_by_mid(1, 3, address_2): r_add1 = address_2 r_add3 = address_3 else: r_add1 = address_3 r_add3 = address_2 elif r_add2 == address_2: if compare_by_mid(1, 3, address_1): r_add1 = address_1 r_add3 = address_3 else: r_add1 = address_3 r_add3 = address_1 elif r_add2 == address_3: if compare_by_mid(1, 3, address_1): r_add1 = address_1 r_add3 = address_2 else: r_add1 = address_2 r_add3 = address_1 else: if r_add1: pass else: r_add1 = address_1 if r_add2 == None or r_add3 == None: if r_add1 == address_1: if compare_by_mid(2, 3, address_2): r_add2 = address_2 r_add3 = address_3 else: r_add2 = address_3 r_add3 = address_2 elif r_add1 == address_2: if compare_by_mid(2, 3, address_1): r_add2 = address_1 r_add3 = address_3 else: r_add2 = address_3 r_add3 = address_1 elif r_add1 == address_3: if compare_by_mid(2, 3, address_1): r_add2 = address_1 r_add3 = address_2 else: r_add2 = address_2 r_add3 = address_1 if ai_position2 == 3: if r_add3: pass else: r_add3 = address_2 if r_add1 == None or r_add2 == None: if r_add3 == address_1: if compare_by_mid(1, 2, address_2): r_add1 = address_2 r_add2 = address_3 else: r_add1 = address_3 r_add2 = address_2 elif r_add3 == address_2: if compare_by_mid(1, 2, address_1): r_add1 = address_1 r_add2 = address_3 else: r_add1 = address_3 r_add2 = address_1 elif r_add3 == address_3: if compare_by_mid(1, 2, address_1): r_add1 = address_1 r_add2 = address_2 else: r_add1 = address_2 r_add2 = address_1 elif ai_position2 == 2: if r_add2: pass else: r_add2 = address_2 if r_add1 == None or r_add3 == None: if r_add2 == address_1: if compare_by_mid(1, 3, address_2): r_add1 = address_2 r_add3 = address_3 else: r_add1 = address_3 r_add3 = address_2 elif r_add2 == address_2: if compare_by_mid(1, 3, address_1): r_add1 = address_1 r_add3 = address_3 else: r_add1 = address_3 r_add3 = address_1 elif r_add2 == address_3: if compare_by_mid(1, 3, address_1): r_add1 = address_1 r_add3 = address_2 else: r_add1 = address_2 r_add3 = address_1 else: if r_add1: pass else: r_add1 = address_2 if r_add2 == None or r_add3 == None: if r_add1 == address_1: if compare_by_mid(2, 3, address_2): r_add2 = address_2 r_add3 = address_3 else: r_add2 = address_3 r_add3 = address_2 elif r_add1 == address_2: if compare_by_mid(2, 3, address_1): r_add2 = address_1 r_add3 = address_3 else: r_add2 = address_3 r_add3 = address_1 elif r_add1 == address_3: if compare_by_mid(2, 3, address_1): r_add2 = address_1 r_add3 = address_2 else: r_add2 = address_2 r_add3 = address_1 if ai_position3 == 3: if r_add3: pass else: r_add3 = address_3 if r_add1 == None or r_add2 == None: if r_add3 == address_1: if compare_by_mid(1, 2, address_2): r_add1 = address_2 r_add2 = address_3 else: r_add1 = address_3 r_add2 = address_2 elif r_add3 == address_2: if compare_by_mid(1, 2, address_1): r_add1 = address_1 r_add2 = address_3 else: r_add1 = address_3 r_add2 = address_1 elif r_add3 == address_3: if compare_by_mid(1, 2, address_1): r_add1 = address_1 r_add2 = address_2 else: r_add1 = address_2 r_add2 = address_1 elif ai_position3 == 2: if r_add2: pass else: r_add2 = address_3 if r_add1 == None or r_add3 == None: if r_add2 == address_1: if compare_by_mid(1, 3, address_2): r_add1 = address_2 r_add3 = address_3 else: r_add1 = address_3 r_add3 = address_2 elif r_add2 == address_2: if compare_by_mid(1, 3, address_1): r_add1 = address_1 r_add3 = address_3 else: r_add1 = address_3 r_add3 = address_1 elif r_add2 == address_3: if compare_by_mid(1, 3, address_1): r_add1 = address_1 r_add3 = address_2 else: r_add1 = address_2 r_add3 = address_1 else: if r_add1: pass else: r_add1 = address_3 if r_add2 == None or r_add3 == None: if r_add1 == address_1: if compare_by_mid(2, 3, address_2): r_add2 = address_2 r_add3 = address_3 else: r_add2 = address_3 r_add3 = address_2 elif r_add1 == address_2: if compare_by_mid(2, 3, address_1): r_add2 = address_1 r_add3 = address_3 else: r_add2 = address_3 r_add3 = address_1 elif r_add1 == address_3: if compare_by_mid(2, 3, address_1): r_add2 = address_1 r_add3 = address_2 else: r_add2 = address_2 r_add3 = address_1 return swapper(r_add1, r_add2, r_add3) def process_file(filepath: str): wb = openpyxl.load_workbook(filepath, data_only=True) ws = wb.active new_wb = openpyxl.Workbook() new_ws = new_wb.active temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') temp_path = temp_file.name columns_to_process = [4, 5, 6] for col in range(1, ws.max_column + 1): new_ws.cell(row=1, column=col).value = ws.cell(row=1, column=col).value empty_rows = 0 # for row in ws.iter_rows(min_row=2, max_row=ws.max_row): # if any(cell.value not in (None, "") for cell in row): # total_row += 1 for row in ws.iter_rows(min_row=2, max_row=ws.max_row): if empty_rows > 3: break row_num = row[0].row for col in range(1, ws.max_column + 1): if col not in columns_to_process: new_ws.cell(row=row_num, column=col).value = ws.cell(row=row_num, column=col).value else: new_ws.cell(row=row_num, column=col).value = None address_first = ws.cell(row=row_num, column=4).value address_second = ws.cell(row=row_num, column=5).value address_third = ws.cell(row=row_num, column=6).value if address_first != None and address_second != None and address_third != None: # print(f"Processing {address_first} | {address_second} | {address_third}") ad1, ad2, ad3 = settle_all_address(address_first, address_second, address_third) new_ws.cell(row=row_num, column=4).value = ad1 new_ws.cell(row=row_num, column=5).value = ad2 new_ws.cell(row=row_num, column=6).value = ad3 print(f"Adding : {ad1} | {ad2} | {ad3}") else: empty_rows += 1 new_wb.save(temp_path) return temp_path def gradio_process(file): file_path = file.name output_file_path = process_file(file_path) return output_file_path iface = gr.Interface( fn=gradio_process, inputs=gr.File(), outputs=gr.File(), title="AI Address Processor", description="Upload an Excel file, and the AI will process the addresses." ) iface.launch()