Spaces:
Running
Running
Delete utils
Browse files- utils/__init__.py +0 -3
- utils/login.py +0 -18
- utils/main_model.py +0 -52
- utils/rdkit_utils.py +0 -39
utils/__init__.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
from .main_model import ChemicalConverter
|
2 |
-
from .rdkit_utils import validate_smiles2iupac, plot_mol
|
3 |
-
from .login import login
|
|
|
|
|
|
|
|
utils/login.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
import hashlib
|
2 |
-
|
3 |
-
def hash_password(access_code):
|
4 |
-
"""Hash a password for storing."""
|
5 |
-
sha256 = hashlib.sha256()
|
6 |
-
sha256.update(access_code.encode('utf-8'))
|
7 |
-
return sha256.hexdigest()
|
8 |
-
|
9 |
-
|
10 |
-
def login(access_code):
|
11 |
-
if access_code is None:
|
12 |
-
return False
|
13 |
-
hashed_code = hash_password(access_code.replace(" ", ""))
|
14 |
-
with open("hashed_codes.txt") as file:
|
15 |
-
for line in file:
|
16 |
-
if hashed_code in line:
|
17 |
-
return True
|
18 |
-
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/main_model.py
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
from modeling import MT5ForConditionalGeneration
|
2 |
-
from transformers import AutoTokenizer
|
3 |
-
import os
|
4 |
-
|
5 |
-
|
6 |
-
class ChemicalConverter:
|
7 |
-
def __init__(self, mode: str):
|
8 |
-
self.mode = mode
|
9 |
-
model_directory = os.path.abspath("models")
|
10 |
-
model_path = os.path.join(model_directory, mode)
|
11 |
-
|
12 |
-
if mode == "SMILES2IUPAC":
|
13 |
-
model_path = "knowledgator/SMILES2IUPAC-canonical-base"
|
14 |
-
else:
|
15 |
-
model_path = "knowledgator/IUPAC2SMILES-canonical-small"
|
16 |
-
|
17 |
-
self.model = MT5ForConditionalGeneration.from_pretrained(model_path)
|
18 |
-
self.smiles_tokenizer = AutoTokenizer.from_pretrained("knowledgator/SMILES-FAST-TOKENIZER")
|
19 |
-
self.iupac_tokenizer = AutoTokenizer.from_pretrained("knowledgator/IUPAC-FAST-TOKENIZER")
|
20 |
-
self.smiles_max_len = 128
|
21 |
-
self.iupac_max_len = 156
|
22 |
-
|
23 |
-
def convert(self, input):
|
24 |
-
input = input.replace(" ", "")
|
25 |
-
if self.mode == "SMILES2IUPAC":
|
26 |
-
tokenizer = self.smiles_tokenizer
|
27 |
-
reverse_tokenizer = self.iupac_tokenizer
|
28 |
-
max_length = self.smiles_max_len
|
29 |
-
else:
|
30 |
-
tokenizer = self.iupac_tokenizer
|
31 |
-
reverse_tokenizer = self.smiles_tokenizer
|
32 |
-
max_length = self.iupac_max_len
|
33 |
-
|
34 |
-
encoding = tokenizer(input,
|
35 |
-
return_tensors='pt',
|
36 |
-
padding="max_length",
|
37 |
-
truncation=True,
|
38 |
-
max_length=max_length)
|
39 |
-
# Move the input tensor to GPU
|
40 |
-
encoding = {key: value.to(self.model.device) for key, value in encoding.items()}
|
41 |
-
|
42 |
-
# Generate names
|
43 |
-
output = self.model.generate(input_ids=encoding['input_ids'],
|
44 |
-
attention_mask=encoding['attention_mask'],
|
45 |
-
max_new_tokens=156,
|
46 |
-
num_beams=1,
|
47 |
-
num_return_sequences=1)
|
48 |
-
|
49 |
-
# Decode names
|
50 |
-
output = [reverse_tokenizer.decode(ids, skip_special_tokens=True) for ids in output]
|
51 |
-
|
52 |
-
return output[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/rdkit_utils.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
from rdkit import DataStructs, Chem
|
2 |
-
from rdkit.Chem import AllChem
|
3 |
-
from rdkit.Chem import Draw
|
4 |
-
from PIL import Image
|
5 |
-
import io
|
6 |
-
from .main_model import ChemicalConverter
|
7 |
-
|
8 |
-
def validate_smiles2iupac(input_smiles, predicted_iupac):
|
9 |
-
converter = ChemicalConverter(mode="IUPAC2SMILES")
|
10 |
-
predicted_smiles = converter.convert(predicted_iupac)
|
11 |
-
|
12 |
-
ms = [Chem.MolFromSmiles(input_smiles), Chem.MolFromSmiles(predicted_smiles[6:])]
|
13 |
-
|
14 |
-
if None in ms:
|
15 |
-
return None
|
16 |
-
|
17 |
-
fpgen = AllChem.GetRDKitFPGenerator()
|
18 |
-
fps = [fpgen.GetFingerprint(x) for x in ms]
|
19 |
-
|
20 |
-
return DataStructs.TanimotoSimilarity(fps[0], fps[1])
|
21 |
-
|
22 |
-
def plot_mol(smiles):
|
23 |
-
# Convert the SMILES string to an RDKit molecule object
|
24 |
-
mol = Chem.MolFromSmiles(smiles)
|
25 |
-
|
26 |
-
# Use RDKit to draw the molecule to an image, with original intended size
|
27 |
-
img = Draw.MolToImage(mol, size=(185, 185))
|
28 |
-
|
29 |
-
# Create a new, blank image with the desired final size (800x190 pixels) with a white background
|
30 |
-
final_img = Image.new('RGB', (890, 185), 'white')
|
31 |
-
|
32 |
-
# Calculate the position to paste the original image onto the blank image to keep it centered
|
33 |
-
left = (890 - 185) // 2
|
34 |
-
top = (185 - 185) // 2 # This will be zero in this case but included for clarity
|
35 |
-
|
36 |
-
# Paste the original image onto the blank image
|
37 |
-
final_img.paste(img, (left, top))
|
38 |
-
|
39 |
-
return final_img
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|