Spaces:

Jayesh13
/

Homo_hetero

Sleeping

App Files Files Community

Jayesh13 commited on 20 days ago

Commit

5a986b1

verified ·

1 Parent(s): 33d056e

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -9

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ import xlsxwriter
 from io import BytesIO
 from collections import defaultdict
 def find_homorepeats(protein):
     n = len(protein)
     freq = defaultdict(int)
@@ -60,26 +64,32 @@ def process_protein_sequence(sequence, analysis_type, overlap=50):
     fragments = fragment_protein_sequence(sequence)
     final_repeats = defaultdict(int)
     homo_repeats = {}
-    for fragment in fragments:
-        if analysis_type in ["Hetero", "Both"]:
             fragment_repeats = find_hetero_amino_acid_repeats(fragment)
             for k, v in fragment_repeats.items():
                 final_repeats[k] += v
-    if analysis_type in ["Hetero", "Both"]:
         final_repeats = check_boundary_repeats(fragments, final_repeats, overlap)
         new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
         for k, v in new_repeats.items():
             final_repeats[k] += v
     if analysis_type in ["Homo", "Both"]:
         homo_repeats = find_homorepeats(sequence)
         for k, v in homo_repeats.items():
             final_repeats[k] += v
-    if analysis_type == "Hetero" or "Both":
-        for k, v in homo_repeats.items():
-            if k in final_repeats:
-                final_repeats[k] -= v
-                if final_repeats[k] <= 0:
-                    del final_repeats[k]
     return final_repeats
 def process_excel(excel_data, analysis_type):
@@ -124,6 +134,7 @@ def create_excel(sequences_data, repeats, filenames):
     output.seek(0)
     return output
 st.title("Protein Repeat Analysis")
 analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
 uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])

 from io import BytesIO
 from collections import defaultdict
+# Detect homo repeats like AA, AAA, etc.
+def is_homo_repeat(s):
+    return all(c == s[0] for c in s)
 def find_homorepeats(protein):
     n = len(protein)
     freq = defaultdict(int)
     fragments = fragment_protein_sequence(sequence)
     final_repeats = defaultdict(int)
     homo_repeats = {}
+    if analysis_type in ["Hetero", "Both"]:
+        for fragment in fragments:
             fragment_repeats = find_hetero_amino_acid_repeats(fragment)
             for k, v in fragment_repeats.items():
                 final_repeats[k] += v
         final_repeats = check_boundary_repeats(fragments, final_repeats, overlap)
         new_repeats = find_new_boundary_repeats(fragments, final_repeats, overlap)
         for k, v in new_repeats.items():
             final_repeats[k] += v
     if analysis_type in ["Homo", "Both"]:
         homo_repeats = find_homorepeats(sequence)
         for k, v in homo_repeats.items():
             final_repeats[k] += v
+    # Remove homo from hetero-only results
+    if analysis_type == "Hetero":
+        for k in list(final_repeats.keys()):
+            if is_homo_repeat(k):
+                del final_repeats[k]
+    # Keep only homo repeats for homo-only results
+    if analysis_type == "Homo":
+        final_repeats = {k: v for k, v in final_repeats.items() if is_homo_repeat(k)}
     return final_repeats
 def process_excel(excel_data, analysis_type):
     output.seek(0)
     return output
+# Streamlit UI
 st.title("Protein Repeat Analysis")
 analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
 uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])