Spaces:

ChatterjeeLab
/

SMILES2PEPTIDE

Running

App Files Files Community

yinuozhang commited on Nov 17, 2024

Commit

260f3d0

1 Parent(s): a953180

class format

Browse files

Files changed (1) hide show

app.py +45 -59

app.py CHANGED Viewed

@@ -401,7 +401,7 @@ def annotate_cyclic_structure(mol, sequence):
     AllChem.Compute2DCoords(mol)
     # Create drawer with larger size for annotations
-    drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)  # Even larger size
     # Draw molecule first
     drawer.drawOptions().addAtomIndices = False
@@ -417,7 +417,6 @@ def annotate_cyclic_structure(mol, sequence):
         try:
             small_font = ImageFont.truetype("arial.ttf", 60)
         except OSError:
-            # If no TrueType fonts are available, fall back to default
             print("Warning: TrueType fonts not available, using default font")
             small_font = ImageFont.load_default()
@@ -432,11 +431,11 @@ def annotate_cyclic_structure(mol, sequence):
              font=small_font, fill='black', anchor="mm")
     return img
 def create_enhanced_linear_viz(sequence, smiles):
-    """
-    Create an enhanced linear representation showing segment identification process
-    with improved segment handling
-    """
     # Create figure with two subplots
     fig = plt.figure(figsize=(15, 10))
     gs = fig.add_gridspec(2, 1, height_ratios=[1, 2])
@@ -449,18 +448,12 @@ def create_enhanced_linear_viz(sequence, smiles):
     else:
         residues = sequence.split('-')
-    # Get molecule and analyze bonds
-    mol = Chem.MolFromSmiles(smiles)
-    # Split SMILES into segments for analysis
-    bond_pattern = r'(NC\(=O\)|N\(C\)C\(=O\)|N\dC\(=O\)|OC\(=O\))'
-    segments = re.split(bond_pattern, smiles)
-    segments = [s for s in segments if s]  # Remove empty segments
     # Debug print
     print(f"Number of residues: {len(residues)}")
     print(f"Number of segments: {len(segments)}")
-    print("Segments:", segments)
     # Top subplot - Basic structure
     ax_struct.set_xlim(0, 10)
@@ -481,34 +474,27 @@ def create_enhanced_linear_viz(sequence, smiles):
         # Draw connecting bonds if not the last residue
         if i < num_residues - 1:
-            # Find the next bond pattern after this residue
-            bond_segment = None
-            for j in range(len(segments)):
-                if re.match(bond_pattern, segments[j]):
-                    if j > i*2 and j//2 == i:  # Found the right bond
-                        bond_segment = segments[j]
-                        break
-            if bond_segment:
-                bond_type, is_n_methylated = identify_linkage_type(bond_segment)
-            else:
-                bond_type = 'peptide'  # Default if not found
-            bond_color = 'black' if bond_type == 'peptide' else 'red'
-            linestyle = '-' if bond_type == 'peptide' else '--'
-            # Draw bond line
-            ax_struct.plot([x_pos+0.3, x_pos+spacing-0.3], [y_pos, y_pos],
-                         color=bond_color, linestyle=linestyle, linewidth=2)
-            # Add bond type label
-            mid_x = x_pos + spacing/2
-            bond_label = f"{bond_type}"
-            if is_n_methylated:
-                bond_label += "\n(N-Me)"
-            ax_struct.text(mid_x, y_pos+0.1, bond_label,
-                         ha='center', va='bottom', fontsize=10,
-                         color=bond_color)
         # Add residue label
         ax_struct.text(x_pos, y_pos-0.5, residues[i],
@@ -523,27 +509,27 @@ def create_enhanced_linear_viz(sequence, smiles):
     for i, segment in enumerate(segments):
         y = segment_y - i
-        # Check if this is a bond segment
-        if re.match(bond_pattern, segment):
-            bond_type, is_n_methylated = identify_linkage_type(segment)
-            text = f"Bond {i//2 + 1}: {bond_type}"
-            if is_n_methylated:
-                text += " (N-methylated)"
-            color = 'red'
-        else:
-            # Get next and previous segments for context
-            next_seg = segments[i+1] if i+1 < len(segments) else None
-            prev_seg = segments[i-1] if i > 0 else None
-            residue, modifications = identify_residue(segment, next_seg, prev_seg)
-            text = f"Residue {i//2 + 1}: {residue}"
-            if modifications:
-                text += f" ({', '.join(modifications)})"
             color = 'blue'
         # Add segment analysis
         ax_detail.text(0.05, y, text, fontsize=12, color=color)
-        ax_detail.text(0.5, y, f"SMILES: {segment}", fontsize=10, color='gray')
     # If cyclic, add connection indicator
     if sequence.startswith('cyclo('):
@@ -684,7 +670,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False):
             return f"Error processing file: {str(e)}", None, None
     return "No input provided.", None, None
-# Create Gradio interface with simplified examples
 iface = gr.Interface(
     fn=process_input,
     inputs=[

     AllChem.Compute2DCoords(mol)
     # Create drawer with larger size for annotations
+    drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)
     # Draw molecule first
     drawer.drawOptions().addAtomIndices = False
         try:
             small_font = ImageFont.truetype("arial.ttf", 60)
         except OSError:
             print("Warning: TrueType fonts not available, using default font")
             small_font = ImageFont.load_default()
              font=small_font, fill='black', anchor="mm")
     return img
 def create_enhanced_linear_viz(sequence, smiles):
+    """Create an enhanced linear representation using PeptideAnalyzer"""
+    analyzer = PeptideAnalyzer()  # Create analyzer instance
     # Create figure with two subplots
     fig = plt.figure(figsize=(15, 10))
     gs = fig.add_gridspec(2, 1, height_ratios=[1, 2])
     else:
         residues = sequence.split('-')
+    # Get segments using analyzer
+    segments = analyzer.split_on_bonds(smiles)
     # Debug print
     print(f"Number of residues: {len(residues)}")
     print(f"Number of segments: {len(segments)}")
     # Top subplot - Basic structure
     ax_struct.set_xlim(0, 10)
         # Draw connecting bonds if not the last residue
         if i < num_residues - 1:
+            segment = segments[i] if i < len(segments) else None
+            if segment:
+                # Determine bond type from segment info
+                bond_type = 'ester' if 'O-linked' in segment.get('bond_after', '') else 'peptide'
+                is_n_methylated = 'N-Me' in segment.get('bond_after', '')
+                bond_color = 'red' if bond_type == 'ester' else 'black'
+                linestyle = '--' if bond_type == 'ester' else '-'
+                # Draw bond line
+                ax_struct.plot([x_pos+0.3, x_pos+spacing-0.3], [y_pos, y_pos],
+                             color=bond_color, linestyle=linestyle, linewidth=2)
+                # Add bond type label
+                mid_x = x_pos + spacing/2
+                bond_label = f"{bond_type}"
+                if is_n_methylated:
+                    bond_label += "\n(N-Me)"
+                ax_struct.text(mid_x, y_pos+0.1, bond_label,
+                             ha='center', va='bottom', fontsize=10,
+                             color=bond_color)
         # Add residue label
         ax_struct.text(x_pos, y_pos-0.5, residues[i],
     for i, segment in enumerate(segments):
         y = segment_y - i
+        # Check if this is a bond or residue
+        residue, mods = analyzer.identify_residue(segment)
+        if residue:
+            text = f"Residue {i+1}: {residue}"
+            if mods:
+                text += f" ({', '.join(mods)})"
             color = 'blue'
+        else:
+            # Must be a bond
+            text = f"Bond {i}: "
+            if 'O-linked' in segment.get('bond_after', ''):
+                text += "ester"
+            elif 'N-Me' in segment.get('bond_after', ''):
+                text += "peptide (N-methylated)"
+            else:
+                text += "peptide"
+            color = 'red'
         # Add segment analysis
         ax_detail.text(0.05, y, text, fontsize=12, color=color)
+        ax_detail.text(0.5, y, f"SMILES: {segment.get('content', '')}", fontsize=10, color='gray')
     # If cyclic, add connection indicator
     if sequence.startswith('cyclo('):
             return f"Error processing file: {str(e)}", None, None
     return "No input provided.", None, None
 iface = gr.Interface(
     fn=process_input,
     inputs=[