Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import xlsxwriter
|
|
7 |
from io import BytesIO
|
8 |
from collections import defaultdict
|
9 |
|
10 |
-
#
|
11 |
def is_homo_repeat(s):
|
12 |
return all(c == s[0] for c in s)
|
13 |
|
@@ -63,9 +63,8 @@ def find_new_boundary_repeats(fragments, final_repeats, overlap=50):
|
|
63 |
def process_protein_sequence(sequence, analysis_type, overlap=50):
|
64 |
fragments = fragment_protein_sequence(sequence)
|
65 |
final_repeats = defaultdict(int)
|
66 |
-
homo_repeats = {}
|
67 |
|
68 |
-
if analysis_type
|
69 |
for fragment in fragments:
|
70 |
fragment_repeats = find_hetero_amino_acid_repeats(fragment)
|
71 |
for k, v in fragment_repeats.items():
|
@@ -74,21 +73,28 @@ def process_protein_sequence(sequence, analysis_type, overlap=50):
|
|
74 |
new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
|
75 |
for k, v in new_repeats.items():
|
76 |
final_repeats[k] += v
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
for k, v in homo_repeats.items():
|
81 |
-
final_repeats[k] += v
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
for
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
final_repeats =
|
|
|
|
|
92 |
|
93 |
return final_repeats
|
94 |
|
@@ -134,7 +140,6 @@ def create_excel(sequences_data, repeats, filenames):
|
|
134 |
output.seek(0)
|
135 |
return output
|
136 |
|
137 |
-
# Streamlit UI
|
138 |
st.title("Protein Repeat Analysis")
|
139 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
140 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|
|
|
7 |
from io import BytesIO
|
8 |
from collections import defaultdict
|
9 |
|
10 |
+
# Utility to check homo repeat
|
11 |
def is_homo_repeat(s):
|
12 |
return all(c == s[0] for c in s)
|
13 |
|
|
|
63 |
def process_protein_sequence(sequence, analysis_type, overlap=50):
|
64 |
fragments = fragment_protein_sequence(sequence)
|
65 |
final_repeats = defaultdict(int)
|
|
|
66 |
|
67 |
+
if analysis_type == "Hetero":
|
68 |
for fragment in fragments:
|
69 |
fragment_repeats = find_hetero_amino_acid_repeats(fragment)
|
70 |
for k, v in fragment_repeats.items():
|
|
|
73 |
new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
|
74 |
for k, v in new_repeats.items():
|
75 |
final_repeats[k] += v
|
76 |
+
final_repeats = {k: v for k, v in final_repeats.items() if not is_homo_repeat(k)]
|
77 |
|
78 |
+
elif analysis_type == "Homo":
|
79 |
+
final_repeats = find_homorepeats(sequence)
|
|
|
|
|
80 |
|
81 |
+
elif analysis_type == "Both":
|
82 |
+
hetero_repeats = defaultdict(int)
|
83 |
+
for fragment in fragments:
|
84 |
+
fragment_repeats = find_hetero_amino_acid_repeats(fragment)
|
85 |
+
for k, v in fragment_repeats.items():
|
86 |
+
hetero_repeats[k] += v
|
87 |
+
hetero_repeats = check_boundary_repeats(fragments, hetero_repeats, overlap)
|
88 |
+
new_repeats = find_new_boundary_repeats(fragments, hetero_repeats, overlap)
|
89 |
+
for k, v in new_repeats.items():
|
90 |
+
hetero_repeats[k] += v
|
91 |
+
hetero_repeats = {k: v for k, v in hetero_repeats.items() if not is_homo_repeat(k)]
|
92 |
|
93 |
+
homo_repeats = find_homorepeats(sequence)
|
94 |
+
|
95 |
+
final_repeats = homo_repeats.copy()
|
96 |
+
for k, v in hetero_repeats.items():
|
97 |
+
final_repeats[k] += v
|
98 |
|
99 |
return final_repeats
|
100 |
|
|
|
140 |
output.seek(0)
|
141 |
return output
|
142 |
|
|
|
143 |
st.title("Protein Repeat Analysis")
|
144 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
145 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|