Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,10 @@ import xlsxwriter
|
|
7 |
from io import BytesIO
|
8 |
from collections import defaultdict
|
9 |
|
|
|
|
|
|
|
|
|
10 |
def find_homorepeats(protein):
|
11 |
n = len(protein)
|
12 |
freq = defaultdict(int)
|
@@ -60,26 +64,32 @@ def process_protein_sequence(sequence, analysis_type, overlap=50):
|
|
60 |
fragments = fragment_protein_sequence(sequence)
|
61 |
final_repeats = defaultdict(int)
|
62 |
homo_repeats = {}
|
63 |
-
|
64 |
-
|
|
|
65 |
fragment_repeats = find_hetero_amino_acid_repeats(fragment)
|
66 |
for k, v in fragment_repeats.items():
|
67 |
final_repeats[k] += v
|
68 |
-
if analysis_type in ["Hetero", "Both"]:
|
69 |
final_repeats = check_boundary_repeats(fragments, final_repeats, overlap)
|
70 |
new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
|
71 |
for k, v in new_repeats.items():
|
72 |
final_repeats[k] += v
|
|
|
73 |
if analysis_type in ["Homo", "Both"]:
|
74 |
homo_repeats = find_homorepeats(sequence)
|
75 |
for k, v in homo_repeats.items():
|
76 |
final_repeats[k] += v
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
83 |
return final_repeats
|
84 |
|
85 |
def process_excel(excel_data, analysis_type):
|
@@ -124,6 +134,7 @@ def create_excel(sequences_data, repeats, filenames):
|
|
124 |
output.seek(0)
|
125 |
return output
|
126 |
|
|
|
127 |
st.title("Protein Repeat Analysis")
|
128 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
129 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|
|
|
7 |
from io import BytesIO
|
8 |
from collections import defaultdict
|
9 |
|
10 |
+
# Detect homo repeats like AA, AAA, etc.
|
11 |
+
def is_homo_repeat(s):
|
12 |
+
return all(c == s[0] for c in s)
|
13 |
+
|
14 |
def find_homorepeats(protein):
|
15 |
n = len(protein)
|
16 |
freq = defaultdict(int)
|
|
|
64 |
fragments = fragment_protein_sequence(sequence)
|
65 |
final_repeats = defaultdict(int)
|
66 |
homo_repeats = {}
|
67 |
+
|
68 |
+
if analysis_type in ["Hetero", "Both"]:
|
69 |
+
for fragment in fragments:
|
70 |
fragment_repeats = find_hetero_amino_acid_repeats(fragment)
|
71 |
for k, v in fragment_repeats.items():
|
72 |
final_repeats[k] += v
|
|
|
73 |
final_repeats = check_boundary_repeats(fragments, final_repeats, overlap)
|
74 |
new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
|
75 |
for k, v in new_repeats.items():
|
76 |
final_repeats[k] += v
|
77 |
+
|
78 |
if analysis_type in ["Homo", "Both"]:
|
79 |
homo_repeats = find_homorepeats(sequence)
|
80 |
for k, v in homo_repeats.items():
|
81 |
final_repeats[k] += v
|
82 |
+
|
83 |
+
# Remove homo from hetero-only results
|
84 |
+
if analysis_type == "Hetero":
|
85 |
+
for k in list(final_repeats.keys()):
|
86 |
+
if is_homo_repeat(k):
|
87 |
+
del final_repeats[k]
|
88 |
+
|
89 |
+
# Keep only homo repeats for homo-only results
|
90 |
+
if analysis_type == "Homo":
|
91 |
+
final_repeats = {k: v for k, v in final_repeats.items() if is_homo_repeat(k)}
|
92 |
+
|
93 |
return final_repeats
|
94 |
|
95 |
def process_excel(excel_data, analysis_type):
|
|
|
134 |
output.seek(0)
|
135 |
return output
|
136 |
|
137 |
+
# Streamlit UI
|
138 |
st.title("Protein Repeat Analysis")
|
139 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
140 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|