import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch # Load the pre-trained model and tokenizer model_name = "khanfs/ChemSolubilityBERTa" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Define the prediction function def predict_solubility(smiles_string): inputs = tokenizer(smiles_string, return_tensors='pt', truncation=True, padding='max_length', max_length=128) with torch.no_grad(): outputs = model(**inputs) solubility = outputs.logits.item() return f"Predicted Solubility: {solubility:.4f} log mol/L" # Gradio interface iface = gr.Interface( fn=predict_solubility, inputs="text", outputs="text", title="ChemSolubilityBERTa", description="Enter a SMILES string to predict its aqueous solubility using ChemSolubilityBERTa.", examples=[["CCO"], ["CC(C)=O"], ["C1=CC=CC=C1"]] # Example SMILES strings for ethanol, acetone, and benzene ) # Launch the app iface.launch()