|
import json |
|
import os |
|
|
|
|
|
import joblib |
|
import numpy as np |
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
const_HW = 1.294117647 |
|
const_W = 600 |
|
|
|
|
|
def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"): |
|
df["line_number"] = 0 |
|
colmn.append("line_number") |
|
array_value = df[colmn].values |
|
start_index = Line_counter = counter = 0 |
|
ymax, ymin, line_no = ( |
|
colmn.index(ymax_col), |
|
colmn.index(ymin_col), |
|
colmn.index("line_number"), |
|
) |
|
while counter < len(array_value): |
|
current_ymax = array_value[start_index][ymax] |
|
for next_index in range(start_index, len(array_value)): |
|
counter += 1 |
|
|
|
next_ymin = array_value[next_index][ymin] |
|
next_ymax = array_value[next_index][ymax] |
|
if current_ymax > next_ymin: |
|
|
|
array_value[next_index][line_no] = Line_counter + 1 |
|
|
|
|
|
|
|
else: |
|
counter -= 1 |
|
break |
|
|
|
start_index = counter |
|
Line_counter += 1 |
|
return pd.DataFrame(array_value, columns=colmn) |
|
|
|
|
|
def do_sorting(df): |
|
df.sort_values(["ymin", "xmin"], ascending=True, inplace=True) |
|
df["idx"] = df.index |
|
if "line_number" in df.columns: |
|
print("line number removed") |
|
df.drop("line_number", axis=1, inplace=True) |
|
req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"] |
|
temp_df = df.copy() |
|
temp = bucket_sort(temp_df.copy(), req_colns) |
|
df = df.merge(temp[["idx", "line_number"]], on="idx") |
|
df.sort_values(["line_number", "xmin"], ascending=True, inplace=True) |
|
df = df.reset_index(drop=True) |
|
df = df.reset_index(drop=True) |
|
return df |
|
|