Jayesh13 commited on
Commit
5a986b1
·
verified ·
1 Parent(s): 33d056e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -7,6 +7,10 @@ import xlsxwriter
7
  from io import BytesIO
8
  from collections import defaultdict
9
 
 
 
 
 
10
  def find_homorepeats(protein):
11
  n = len(protein)
12
  freq = defaultdict(int)
@@ -60,26 +64,32 @@ def process_protein_sequence(sequence, analysis_type, overlap=50):
60
  fragments = fragment_protein_sequence(sequence)
61
  final_repeats = defaultdict(int)
62
  homo_repeats = {}
63
- for fragment in fragments:
64
- if analysis_type in ["Hetero", "Both"]:
 
65
  fragment_repeats = find_hetero_amino_acid_repeats(fragment)
66
  for k, v in fragment_repeats.items():
67
  final_repeats[k] += v
68
- if analysis_type in ["Hetero", "Both"]:
69
  final_repeats = check_boundary_repeats(fragments, final_repeats, overlap)
70
  new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
71
  for k, v in new_repeats.items():
72
  final_repeats[k] += v
 
73
  if analysis_type in ["Homo", "Both"]:
74
  homo_repeats = find_homorepeats(sequence)
75
  for k, v in homo_repeats.items():
76
  final_repeats[k] += v
77
- if analysis_type == "Hetero" or "Both":
78
- for k, v in homo_repeats.items():
79
- if k in final_repeats:
80
- final_repeats[k] -= v
81
- if final_repeats[k] <= 0:
82
- del final_repeats[k]
 
 
 
 
 
83
  return final_repeats
84
 
85
  def process_excel(excel_data, analysis_type):
@@ -124,6 +134,7 @@ def create_excel(sequences_data, repeats, filenames):
124
  output.seek(0)
125
  return output
126
 
 
127
  st.title("Protein Repeat Analysis")
128
  analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
129
  uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
 
7
  from io import BytesIO
8
  from collections import defaultdict
9
 
10
+ # Detect homo repeats like AA, AAA, etc.
11
+ def is_homo_repeat(s):
12
+ return all(c == s[0] for c in s)
13
+
14
  def find_homorepeats(protein):
15
  n = len(protein)
16
  freq = defaultdict(int)
 
64
  fragments = fragment_protein_sequence(sequence)
65
  final_repeats = defaultdict(int)
66
  homo_repeats = {}
67
+
68
+ if analysis_type in ["Hetero", "Both"]:
69
+ for fragment in fragments:
70
  fragment_repeats = find_hetero_amino_acid_repeats(fragment)
71
  for k, v in fragment_repeats.items():
72
  final_repeats[k] += v
 
73
  final_repeats = check_boundary_repeats(fragments, final_repeats, overlap)
74
  new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
75
  for k, v in new_repeats.items():
76
  final_repeats[k] += v
77
+
78
  if analysis_type in ["Homo", "Both"]:
79
  homo_repeats = find_homorepeats(sequence)
80
  for k, v in homo_repeats.items():
81
  final_repeats[k] += v
82
+
83
+ # Remove homo from hetero-only results
84
+ if analysis_type == "Hetero":
85
+ for k in list(final_repeats.keys()):
86
+ if is_homo_repeat(k):
87
+ del final_repeats[k]
88
+
89
+ # Keep only homo repeats for homo-only results
90
+ if analysis_type == "Homo":
91
+ final_repeats = {k: v for k, v in final_repeats.items() if is_homo_repeat(k)}
92
+
93
  return final_repeats
94
 
95
  def process_excel(excel_data, analysis_type):
 
134
  output.seek(0)
135
  return output
136
 
137
+ # Streamlit UI
138
  st.title("Protein Repeat Analysis")
139
  analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
140
  uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])