File size: 1,328 Bytes
32fe622 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import re
from rdkit import Chem
from rdkit.Chem import MolFromSmiles, SDWriter
import logging
from Bio import SeqIO
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def process_smiles(smiles: str) -> str:
mol = MolFromSmiles(smiles)
if not mol:
raise ValueError(f"Invalid SMILES string: {smiles}")
sdf_file = "/tmp/output.sdf"
writer = SDWriter(sdf_file)
writer.write(mol)
writer.close()
return sdf_file
def process_pdb(file_path: str) -> str:
sequences = []
with open(file_path, "r") as handle:
for record in SeqIO.parse(handle, "pdb-seqres"):
sequences.append(str(record.seq))
return " ".join(sequences)
def process_sdf(file_path: str) -> str:
return file_path
def extract_smiles(text: str) -> str:
smiles_pattern = r"([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\/%=#$]{6,})"
matches = re.findall(smiles_pattern, text)
if matches:
return matches[0]
return ""
def is_valid_smiles(smiles: str) -> bool:
mol = MolFromSmiles(smiles)
return mol is not None
def extract_and_convert_to_sdf(text: str) -> str:
smiles = extract_smiles(text)
if smiles and is_valid_smiles(smiles):
return process_smiles(smiles)
raise ValueError("No valid SMILES string found in the text.")
|