|
def get_span(orig, new, editType): |
|
orig_list = orig.split(" ") |
|
new_list = new.split(" ") |
|
|
|
flag = False |
|
if editType == "deletion": |
|
assert len(orig_list) > len(new_list), f"the edit type is deletion, but new is not shorter than original:\n new: {new}\n orig: {orig}" |
|
diff = len(orig_list) - len(new_list) |
|
for i, (o, n) in enumerate(zip(orig_list, new_list)): |
|
if o != n: |
|
|
|
orig_span = [i, i + diff - 1] |
|
new_span = [i-1, i] |
|
flag = True |
|
break |
|
|
|
|
|
elif editType == "insertion": |
|
assert len(orig_list) < len(new_list), f"the edit type is insertion, but the new is not longer than the original:\n new: {new}\n orig: {orig}" |
|
diff = len(new_list) - len(orig_list) |
|
for i, (o, n) in enumerate(zip(orig_list, new_list)): |
|
if o != n: |
|
new_span = [i, i + diff - 1] |
|
orig_span = [i-1, i] |
|
flag = True |
|
break |
|
|
|
elif editType == "substitution": |
|
new_span = [] |
|
orig_span = [] |
|
for i, (o, n) in enumerate(zip(orig_list, new_list)): |
|
if o != n: |
|
new_span = [i] |
|
orig_span = [i] |
|
break |
|
assert len(new_span) == 1 and len(orig_span) == 1, f"new_span: {new_span}, orig_span: {orig_span}" |
|
for j, (o, n) in enumerate(zip(orig_list[::-1], new_list[::-1])): |
|
if o != n: |
|
new_span.append(len(new_list) - j -1) |
|
orig_span.append(len(orig_list) - j - 1) |
|
flag = True |
|
break |
|
else: |
|
raise RuntimeError(f"editType unknown: {editType}") |
|
|
|
if not flag: |
|
raise RuntimeError(f"wrong editing with the specified edit type:\n original: {orig}\n new: {new}\n, editType: {editType}") |
|
|
|
return orig_span, new_span |