Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Jun 15 11:24:45 2020 | |
@author: luol2 | |
""" | |
import io | |
def nest_overlap_entity(nest_list): | |
temp_result_list={} | |
for i in range(0, len(nest_list)): | |
hpoid=nest_list[i][3] | |
if hpoid not in temp_result_list.keys(): | |
temp_result_list[hpoid]=nest_list[i] | |
else: | |
score=float(nest_list[i][4]) | |
old_score=float(temp_result_list[hpoid][4]) | |
if score>old_score: # retain higer score concept | |
temp_result_list[hpoid]=nest_list[i] | |
new_list=[] | |
for hpoid in temp_result_list.keys(): | |
new_list.append(temp_result_list[hpoid]) | |
temp_result_list={} #same index, different ids | |
for i in range(0, len(new_list)): | |
ids=new_list[i][0]+' '+new_list[i][1] | |
if ids not in temp_result_list.keys(): | |
temp_result_list[ids]=new_list[i] | |
else: | |
score=float(nest_list[i][4]) | |
old_score=float(temp_result_list[ids][4]) | |
if score>old_score: | |
temp_result_list[ids]=new_list[i] | |
final_list=[] | |
for ids in temp_result_list.keys(): | |
final_list.append(temp_result_list[ids]) | |
return final_list | |
def combine_ml_dict(dict_tsv,ml_tsv,nest=True): | |
fin_dic=io.StringIO(dict_tsv) | |
fin_ml=io.StringIO(ml_tsv) | |
fout=io.StringIO() | |
all_dic=fin_dic.read().strip().split('\n\n') | |
all_ml=fin_ml.read().strip().split('\n\n') | |
fin_dic.close() | |
fin_ml.close() | |
for i in range(0,len(all_dic)): | |
lines_dic=all_dic[i].split('\n') | |
lines_ml=all_ml[i].split('\n') | |
entity_list={} | |
for j in range(1,len(lines_dic)): | |
seg=lines_dic[j].split('\t') | |
entity_list[lines_dic[j]]=[int(seg[0]),int(seg[1])] #dict results score 1.00 | |
for j in range(1,len(lines_ml)): | |
seg=lines_ml[j].split('\t') | |
entity_list[lines_ml[j]]=[int(seg[0]),int(seg[1])] | |
entity_list=sorted(entity_list.items(), key=lambda kv:(kv[1]), reverse=False) | |
entity_list_sort=[] | |
for ele in entity_list: | |
entity_list_sort.append(ele[0]) | |
final_entity=[] | |
if len(entity_list_sort)!=0: | |
first_entity=entity_list_sort[0].split('\t') | |
nest_list=[first_entity] | |
max_eid=int(first_entity[1]) | |
for i in range(1,len(entity_list_sort)): | |
segs=entity_list_sort[i].split('\t') | |
if int(segs[0])> max_eid: | |
if len(nest_list)==1: | |
final_entity.append(nest_list[0]) | |
nest_list=[] | |
nest_list.append(segs) | |
if int(segs[1])>max_eid: | |
max_eid=int(segs[1]) | |
else: | |
tem=nest_overlap_entity(nest_list) | |
final_entity.extend(tem) | |
nest_list=[] | |
nest_list.append(segs) | |
if int(segs[1])>max_eid: | |
max_eid=int(segs[1]) | |
else: | |
nest_list.append(segs) | |
if int(segs[1])>max_eid: | |
max_eid=int(segs[1]) | |
if nest_list!=[]: | |
if len(nest_list)==1: | |
final_entity.append(nest_list[0]) | |
else: | |
tem=nest_overlap_entity(nest_list)#find max entity | |
final_entity.extend(tem) | |
fout.write(lines_ml[0]+'\n') | |
for ele in final_entity: | |
fout.write('\t'.join(ele)+'\n') | |
fout.write('\n') | |
return fout.getvalue() | |