File size: 1,370 Bytes
0b84c0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# -*- coding: utf-8 -*-
"""amino_acid_composition.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1YebtHJU3a9oNapMztiEku0M2VToI_1Lm
"""

# amino_acid_composition.py

def amino_acid_composition(sequence):
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
    composition = {aa: 0 for aa in amino_acids}
    total = len(sequence)

    for aa in sequence:
        if aa in composition:
            composition[aa] += 1

    for aa in composition:
        composition[aa] = (composition[aa] / total) * 100

    return composition

def process_dataset(dataset):
    compositions = []
    for sequence in dataset:
        compositions.append(amino_acid_composition(sequence))
    return compositions

def main(active_peptides, inactive_peptides):
    active_compositions = process_dataset(active_peptides)
    inactive_compositions = process_dataset(inactive_peptides)
    return active_compositions, inactive_compositions

if __name__ == "__main__":
    # Example usage
    active_peptides = ["ACDEFGHIKLMNPQRSTVWY", "ACDEFGHIKLMN"]
    inactive_peptides = ["QRSTVWYACDEFGHIKLMN", "HIKLMNPQRST"]

    active_compositions, inactive_compositions = main(active_peptides, inactive_peptides)
    print("Active Peptide Compositions:", active_compositions)
    print("Inactive Peptide Compositions:", inactive_compositions)