Jayesh13 commited on
Commit
02632f9
·
verified ·
1 Parent(s): 5a986b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -16
app.py CHANGED
@@ -7,7 +7,7 @@ import xlsxwriter
7
  from io import BytesIO
8
  from collections import defaultdict
9
 
10
- # Detect homo repeats like AA, AAA, etc.
11
  def is_homo_repeat(s):
12
  return all(c == s[0] for c in s)
13
 
@@ -63,9 +63,8 @@ def find_new_boundary_repeats(fragments, final_repeats, overlap=50):
63
  def process_protein_sequence(sequence, analysis_type, overlap=50):
64
  fragments = fragment_protein_sequence(sequence)
65
  final_repeats = defaultdict(int)
66
- homo_repeats = {}
67
 
68
- if analysis_type in ["Hetero", "Both"]:
69
  for fragment in fragments:
70
  fragment_repeats = find_hetero_amino_acid_repeats(fragment)
71
  for k, v in fragment_repeats.items():
@@ -74,21 +73,28 @@ def process_protein_sequence(sequence, analysis_type, overlap=50):
74
  new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
75
  for k, v in new_repeats.items():
76
  final_repeats[k] += v
 
77
 
78
- if analysis_type in ["Homo", "Both"]:
79
- homo_repeats = find_homorepeats(sequence)
80
- for k, v in homo_repeats.items():
81
- final_repeats[k] += v
82
 
83
- # Remove homo from hetero-only results
84
- if analysis_type == "Hetero":
85
- for k in list(final_repeats.keys()):
86
- if is_homo_repeat(k):
87
- del final_repeats[k]
 
 
 
 
 
 
88
 
89
- # Keep only homo repeats for homo-only results
90
- if analysis_type == "Homo":
91
- final_repeats = {k: v for k, v in final_repeats.items() if is_homo_repeat(k)}
 
 
92
 
93
  return final_repeats
94
 
@@ -134,7 +140,6 @@ def create_excel(sequences_data, repeats, filenames):
134
  output.seek(0)
135
  return output
136
 
137
- # Streamlit UI
138
  st.title("Protein Repeat Analysis")
139
  analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
140
  uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
 
7
  from io import BytesIO
8
  from collections import defaultdict
9
 
10
+ # Utility to check homo repeat
11
  def is_homo_repeat(s):
12
  return all(c == s[0] for c in s)
13
 
 
63
  def process_protein_sequence(sequence, analysis_type, overlap=50):
64
  fragments = fragment_protein_sequence(sequence)
65
  final_repeats = defaultdict(int)
 
66
 
67
+ if analysis_type == "Hetero":
68
  for fragment in fragments:
69
  fragment_repeats = find_hetero_amino_acid_repeats(fragment)
70
  for k, v in fragment_repeats.items():
 
73
  new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
74
  for k, v in new_repeats.items():
75
  final_repeats[k] += v
76
+ final_repeats = {k: v for k, v in final_repeats.items() if not is_homo_repeat(k)]
77
 
78
+ elif analysis_type == "Homo":
79
+ final_repeats = find_homorepeats(sequence)
 
 
80
 
81
+ elif analysis_type == "Both":
82
+ hetero_repeats = defaultdict(int)
83
+ for fragment in fragments:
84
+ fragment_repeats = find_hetero_amino_acid_repeats(fragment)
85
+ for k, v in fragment_repeats.items():
86
+ hetero_repeats[k] += v
87
+ hetero_repeats = check_boundary_repeats(fragments, hetero_repeats, overlap)
88
+ new_repeats = find_new_boundary_repeats(fragments, hetero_repeats, overlap)
89
+ for k, v in new_repeats.items():
90
+ hetero_repeats[k] += v
91
+ hetero_repeats = {k: v for k, v in hetero_repeats.items() if not is_homo_repeat(k)]
92
 
93
+ homo_repeats = find_homorepeats(sequence)
94
+
95
+ final_repeats = homo_repeats.copy()
96
+ for k, v in hetero_repeats.items():
97
+ final_repeats[k] += v
98
 
99
  return final_repeats
100
 
 
140
  output.seek(0)
141
  return output
142
 
 
143
  st.title("Protein Repeat Analysis")
144
  analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
145
  uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])