Spaces:

Prior-Labs
/

tabpfn-client-gui

Running

App Files Files Community

tabpfn-client-gui / app.py

noahho

Add token retrieval

fffebed 2 days ago

raw

history blame contribute delete

27.7 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split, cross_val_score
	from tabpfn_client import TabPFNClassifier
	from tabpfn_client.tabpfn_common_utils import utils as common_utils
	from tabpfn_client.client import ServiceClient
	from password_strength import PasswordPolicy
	import textwrap
	import io
	import logging
	import sys
	import tempfile
	from pathlib import Path
	import time
	from tabpfn_client import init
	from sklearn.metrics import mean_squared_error, r2_score


	client = ServiceClient()
	access_token = None

	TERMS_OF_SERVICE_URL = "https://www.priorlabs.ai/terms-eu-en"

	is_logged_in = False

	class PromptAgent:
	@staticmethod
	def indent(text: str):
	indent_factor = 2
	indent_str = " " * indent_factor
	return textwrap.indent(text, indent_str)

	@staticmethod
	def password_req_to_policy(password_req: list[str]):
	requirements = {}
	for req in password_req:
	word_part, number_part = req.split("(")
	number = int(number_part[:-1])
	requirements[word_part.lower()] = number
	return PasswordPolicy.from_names(**requirements)

	def login(email, password):
	global access_token
	access_token, message = client.login(email, password)
	if access_token:
	client.authorize(access_token)
	gr.Info("Login successful!")
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
	else:
	gr.Warning(f"Login failed: {message}")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	def register(email, password, password_confirm, first_name, last_name, organization, role, use_case, contact_via_email, tos_agreed):
	global access_token
	if not tos_agreed:
	gr.Warning("Registration failed: You must agree to the Terms of Service.")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	is_valid, message = client.validate_email(email)
	if not is_valid:
	gr.Warning(f"Registration failed: Invalid email - {message}")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	password_req = client.get_password_policy()
	password_policy = PromptAgent.password_req_to_policy(password_req)
	if len(password_policy.test(password)) != 0:
	gr.Warning("Registration failed: Password requirements not satisfied. Please check the password policy.")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	if password != password_confirm:
	gr.Warning("Registration failed: Passwords do not match.")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	validation_link = "tabpfn-2023"
	additional_info = {
	"first_name": first_name,
	"last_name": last_name,
	"company": organization,
	"role": role,
	"use_case": use_case,
	"contact_via_email": contact_via_email,
	}

	is_created, message, access_token = client.register(email, password, password_confirm, validation_link, additional_info)
	if is_created:
	client.authorize(access_token)
	gr.Info("Registration successful! Please check your email for a verification link.")
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
	else:
	gr.Warning(f"Registration failed: {message}")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

	def get_password_policy():
	policy = client.get_password_policy()
	return "\n".join([f"- {req}" for req in policy])

	def list_datasets():
	try:
	data_summary = config.g_tabpfn_config.user_auth_handler.service_client.get_data_summary()

	# Extract relevant information from the data summary
	datasets = data_summary.get('datasets_summary', [])

	# Create a list to hold the formatted dataset information
	formatted_datasets = []

	for dataset in datasets:
	train_set = {
	'Dataset Type': 'Train Set',
	'UID': dataset['train_set_uid'],
	'Added On': dataset['datetime_added'],
	'X Filename': dataset['x_train_filename'],
	'Y Filename': dataset['y_train_filename']
	}
	formatted_datasets.append(train_set)

	for test_set in dataset.get('associated_test_sets', []):
	test_set_info = {
	'Dataset Type': 'Test Set',
	'UID': test_set['test_set_uid'],
	'Added On': test_set['datetime_added'],
	'X Filename': test_set['x_test_filename'],
	'Y Filename': 'N/A' # Test sets don't have y_test_filename
	}
	formatted_datasets.append(test_set_info)

	# Create a DataFrame from the formatted dataset information
	df = pd.DataFrame(formatted_datasets)

	# If the DataFrame is empty, return a message instead
	if df.empty:
	return gr.Dataframe(value=[["No datasets found"]], visible=True)

	return gr.Dataframe(value=df, visible=True)
	except Exception as e:
	gr.Error(f"Error listing datasets: {str(e)}")
	return gr.Dataframe(value=[["Error retrieving datasets"]], visible=True)

	def delete_dataset(dataset_uid, confirm):
	if not confirm:
	gr.Warning("Please confirm the deletion by checking the confirmation box.")
	return None

	try:
	deleted_uids = config.g_tabpfn_config.user_auth_handler.service_client.delete_dataset(dataset_uid)
	gr.Info(f"Successfully deleted dataset(s): {', '.join(deleted_uids)}")
	return list_datasets()
	except Exception as e:
	gr.Error(f"Error deleting dataset: {str(e)}")
	return None

	def delete_account(confirm_password, confirm):
	if not confirm:
	gr.Warning("Please confirm the account deletion by checking the confirmation box.")
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)

	try:
	config.g_tabpfn_config.user_auth_handler.service_client.delete_user_account(confirm_password)
	gr.Info("Account deleted successfully.")
	# Return updates to make login tab visible and others invisible
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
	except Exception as e:
	gr.Error(f"Error deleting account: {str(e)}")
	# Keep current tab visibility if there's an error
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)

	def download_all_data():
	try:
	temp_dir = tempfile.mkdtemp()
	save_path = config.g_tabpfn_config.user_auth_handler.service_client.download_all_data(temp_dir)
	gr.Info("All data downloaded successfully.")
	# Return the file with updated visibility
	return gr.File(value=str(save_path), visible=True)
	except Exception as e:
	gr.Error(f"Error downloading data: {str(e)}")
	return None

	def logout():
	try:
	config.g_tabpfn_config.user_auth_handler.service_client.reset_authorization()
	gr.Info("Logged out successfully.")
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
	except Exception as e:
	gr.Error(f"Error during logout: {str(e)}")
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)

	from sklearn.model_selection import cross_validate

	def estimate_performance(df, feature_cols, target_col, task, progress=gr.Progress()):
	global access_token
	if access_token is None:
	gr.Warning("Please log in or register first.")
	return None

	try:
	progress(0, desc="Preparing data")
	X = df[feature_cols]
	y = df[target_col]

	# Remove rows with missing labels
	mask = ~y.isnull()
	X = X[mask]
	y = y[mask]

	progress(0.1, desc="Initializing model")
	if task == "classification":
	if y.dtype == 'object':
	y = pd.Categorical(y).codes

	if len(np.unique(y)) < 2:
	gr.Warning("The dataset must have at least two different categories in the target column for classification.")
	return None

	model = TabPFNClassifier()
	scoring = {'accuracy': 'accuracy'}
	elif task == "regression":
	model = TabPFNRegressor()
	scoring = {'mse': 'neg_mean_squared_error', 'r2': 'r2'}
	else:
	gr.Error("Invalid task type. Please choose either 'classification' or 'regression'.")
	return None

	progress(0.2, desc="Performing cross-validation")
	cv_results = cross_validate(model, X, y, cv=5, scoring=scoring, n_jobs=-1, return_train_score=False)

	progress(0.9, desc="Generating report")
	if task == "classification":
	scores = cv_results['test_accuracy']
	result = (
	f"Average Accuracy: {np.mean(scores):.2%}\n\n"
	"Individual Test Results:\n"
	f"{', '.join([f'{s:.2%}' for s in scores])}\n\n"
	"What does this mean?\n"
	"- We tested the model's performance by dividing your data into 5 parts.\n"
	"- For each part, we trained the model on 4 parts and tested it on the remaining part.\n"
	"- We repeated this process 5 times, each time using a different part for testing.\n"
	"- The average accuracy shows how often the model correctly predicted the category.\n"
	"- Some variation in these numbers is normal and expected.\n\n"
	"Note: This is an estimate of how well the model might perform on new, unseen data."
	)
	else: # regression
	mse_scores = -cv_results['test_mse'] # Negative because sklearn returns negative MSE
	r2_scores = cv_results['test_r2']
	result = (
	f"Average Mean Squared Error: {np.mean(mse_scores):.4f}\n"
	f"Average R-squared: {np.mean(r2_scores):.4f}\n\n"
	"Individual Test Results:\n"
	f"MSE: {', '.join([f'{s:.4f}' for s in mse_scores])}\n"
	f"R-squared: {', '.join([f'{s:.4f}' for s in r2_scores])}\n\n"
	"What does this mean?\n"
	"- We tested the model's performance by dividing your data into 5 parts.\n"
	"- For each part, we trained the model on 4 parts and tested it on the remaining part.\n"
	"- We repeated this process 5 times, each time using a different part for testing.\n"
	"- Mean Squared Error (MSE) measures the average squared difference between predictions and actual values. Lower is better.\n"
	"- R-squared measures the proportion of variance in the target variable that is predictable from the feature variables. Higher is better.\n"
	"- Some variation in these numbers is normal and expected.\n\n"
	"Note: This is an estimate of how well the model might perform on new, unseen data."
	)

	progress(1.0, desc="Completed")
	gr.Info("Performance estimation completed successfully.")
	return result
	except Exception as e:
	error_message = f"Error during performance estimation: {str(e)}\n\nPlease ensure your data is formatted correctly."
	gr.Error(error_message)
	return None

	def predict(df, feature_cols, target_col, task, progress=gr.Progress()):
	global access_token
	if access_token is None:
	gr.Warning("Please log in or register first.")
	return None, None

	try:
	progress(0, desc="Preparing data")
	X = df[feature_cols]
	y = df[target_col]

	# Split data into labeled and unlabeled
	mask = ~y.isnull()
	X_labeled = X[mask]
	y_labeled = y[mask]
	X_unlabeled = X[~mask]

	progress(0.2, desc="Initializing and training model")
	if task == "classification":
	# Convert target to numeric if it's categorical
	if y_labeled.dtype == 'object':
	y_labeled = pd.Categorical(y_labeled).codes

	if len(np.unique(y_labeled)) < 2:
	gr.Warning("The dataset must have at least two different categories in the target column for classification.")
	return None, None

	# Train the model
	model = TabPFNClassifier()
	model.fit(X_labeled, y_labeled)

	progress(0.6, desc="Making predictions")
	# Make predictions
	predictions = model.predict(X_unlabeled)
	probabilities = model.predict_proba(X_unlabeled)

	progress(0.8, desc="Preparing results")
	# Create a DataFrame with predictions and probabilities
	result_df = X_unlabeled.copy()
	result_df[target_col] = predictions
	for i, class_name in enumerate(model.classes_):
	result_df[f"{target_col}_prob_{class_name}"] = probabilities[:, i]

	elif task == "regression":
	# Train the model
	model = TabPFNRegressor()
	model.fit(X_labeled, y_labeled)

	progress(0.6, desc="Making predictions")
	# Make predictions
	predictions = model.predict(X_unlabeled)

	progress(0.8, desc="Preparing results")
	# Create a DataFrame with predictions
	result_df = X_unlabeled.copy()
	result_df[target_col] = predictions

	else:
	gr.Error("Invalid task type. Please choose either 'classification' or 'regression'.")
	return None, None

	# Save predictions to a new CSV file
	temp_dir = tempfile.mkdtemp()
	output_path = Path(temp_dir) / "predictions.csv"
	result_df.to_csv(output_path, index=False)

	progress(1.0, desc="Completed")
	gr.Info("Predictions completed successfully.")
	return gr.update(visible=True, value=str(output_path)), result_df
	except Exception as e:
	error_message = f"Error during prediction: {str(e)}\n\nPlease ensure your data is formatted correctly and try again."
	gr.Error(error_message)
	return None, None

	def update_column_selection(file):
	try:
	if not file:
	gr.Warning("Please upload a CSV file.")
	return None, None, None, gr.update(visible=False), gr.update(visible=False), None

	df = pd.read_csv(file.name)
	columns = df.columns.tolist()

	# Mark the last column as the target column
	df_with_target = df.copy()
	df_with_target.columns = [f"{col} (Target)" if i == len(columns) - 1 else col for i, col in enumerate(columns)]

	return gr.update(visible=True, choices=columns[:-1], value=columns[:-1]), gr.update(visible=True, choices=columns, value=columns[-1]), gr.update(visible=True, value=df_with_target.head()), gr.update(visible=True), gr.update(visible=True), df
	except Exception as e:
	error_message = f"Error reading CSV file: {str(e)}"
	gr.Error(error_message)
	return None, None, None, gr.update(visible=False), gr.update(visible=False), None

	def create_login_tab():
	with gr.Tab("Login/Register", visible=not is_logged_in) as login_tab:
	with gr.Row():
	with gr.Column():
	login_email = gr.Textbox(label="Email", info="Enter your registered email address")
	login_password = gr.Textbox(label="Password", type="password", info="Enter your password")
	login_button = gr.Button("Login", variant="primary")

	with gr.Accordion("New User? Complete Registration", open=False) as register_accordion:
	register_email = gr.Textbox(label="Email*", info="Enter a valid email address")
	register_password = gr.Textbox(label="Password*", type="password", info="Enter a strong password")
	register_password_confirm = gr.Textbox(label="Confirm Password*", type="password", info="Re-enter your password")
	register_first_name = gr.Textbox(label="First Name*", info="Enter your first name")
	register_last_name = gr.Textbox(label="Last Name*", info="Enter your last name")
	register_organization = gr.Textbox(label="Where do you work? (Optional)", info="Enter your organization name")
	register_role = gr.Textbox(label="What is your role? (Optional)", info="Enter your job title or role")
	register_use_case = gr.Textbox(label="What do you want to use TabPFN for? (Optional)", info="Briefly describe your intended use case")
	register_contact_via_email = gr.Checkbox(label="Can we reach out to you via email to support you?", info="Check this box if you're open to receiving support emails")
	gr.Markdown(f"Please refer to our terms and conditions at: {TERMS_OF_SERVICE_URL}")
	register_tos_agreed = gr.Checkbox(label="I have read and agree to the Terms of Service*", info="You must agree to the Terms of Service to register")
	register_submit = gr.Button("Submit Registration", variant="primary")

	with gr.Accordion("Password Policy", open=False):
	policy_output = gr.Markdown(get_password_policy())

	return login_tab, login_email, login_password, login_button, register_accordion, register_email, register_password, register_password_confirm, register_first_name, register_last_name, register_organization, register_role, register_use_case, register_contact_via_email, register_tos_agreed, register_submit


	def create_account_management_tab():
	with gr.Tab("Account Management", visible=is_logged_in) as account_tab:
	gr.Markdown("## Account Management")

	with gr.Row():
	list_datasets_button = gr.Button("List Datasets")
	download_all_data_button = gr.Button("Download All Data")
	logout_button = gr.Button("Logout")

	# Add Token Display Section
	with gr.Accordion("API Access Token", open=False) as token_accordion:
	token_display = gr.Textbox(
	label="Your API Token",
	info="Use this token for API access",
	interactive=False,
	show_copy_button=True,
	visible=False
	)
	show_token_button = gr.Button("Show/Hide Token")

	datasets_table = gr.Dataframe(label="Your Datasets", visible=False)
	delete_dataset_input = gr.Textbox(label="Dataset UID to delete", visible=False)
	delete_dataset_confirm = gr.Checkbox(label="I confirm that I want to delete this dataset", visible=False)
	delete_dataset_button = gr.Button("Delete Dataset", visible=False)

	download_all_data_file = gr.File(label="Download All Data", visible=False)

	with gr.Accordion("Delete Account and All Data", open=False) as delete_account_accordion:
	delete_account_password = gr.Textbox(label="Confirm Password", type="password")
	delete_account_confirm = gr.Checkbox(label="I confirm that I want to delete my account")
	delete_account_button = gr.Button("Delete Account", variant="stop")

	return account_tab, list_datasets_button, download_all_data_button, logout_button, datasets_table, delete_dataset_input, delete_dataset_confirm, delete_dataset_button, download_all_data_file, delete_account_accordion, delete_account_password, delete_account_confirm, delete_account_button, token_display, show_token_button

	def toggle_token_display(token_visible):
	if token_visible:
	try:
	return gr.update(value=client.access_token, visible=True)
	except Exception as e:
	gr.Warning(f"Unable to retrieve token: {str(e)}")
	return gr.update(visible=False)
	else:
	return gr.update(visible=False)

	def create_predict_tab():
	with gr.Tab("Predict", visible=is_logged_in) as predict_tab:
	gr.Markdown("""
	## Preparing Your Data

	Before uploading, please ensure your CSV file is formatted correctly:

	1. The file should have a header row with column names.
	2. Each subsequent row should represent one data point.
	3. The last column will be treated as the target column (what you want to predict).
	4. All other columns will be treated as feature columns (used for making predictions).
	5. Some rows can have empty values in the target column. These are the ones the model will try to predict.

	Example CSV format:
	```
	feature1,feature2,feature3,target
	value1,value2,value3,category1
	value4,value5,value6,category2
	value7,value8,value9,
	```

	In this example, 'feature1', 'feature2', and 'feature3' are feature columns, and 'target' is the target column.
	The last row has an empty target value, indicating it's a row for prediction.

	Note: Make sure your target column contains only numeric values for regression or categorical labels for classification.

	## How to Use This Demo
	Estimate Performance: This option helps you understand how well the model might work with your data.
	- It uses your labeled data (where you already know the correct values) to estimate the model's accuracy or error.
	- This gives you an idea of how well the model might predict values for new data.

	Predict: This option helps you predict values for new data.
	- It uses your labeled data to learn patterns.
	- Then, it predicts values for your unlabeled data (where you don't know the correct values).
	- The result is a downloadable file with predictions for your unlabeled data.
	""")

	file_input = gr.File(label="Upload your CSV file", file_types=[".csv"])

	with gr.Row(visible=False) as column_selection:
	with gr.Column():
	feature_cols = gr.CheckboxGroup(label="Columns to use for prediction", info="Select the columns you want to use as features for prediction")
	with gr.Column():
	target_col = gr.Dropdown(label="Column to predict", info="Select the column you want to predict")

	preview_data = gr.Dataframe(label="Data Preview", visible=False)

	task = gr.Radio(["classification", "regression"], label="Task", info="Select the type of prediction task")

	with gr.Row():
	estimate_button = gr.Button("Estimate Performance", visible=False)
	predict_button = gr.Button("Predict", visible=False)

	performance_output = gr.Textbox(label="Performance Estimation Results", visible=False)
	prediction_download = gr.File(label="Download Predictions", visible=False)
	prediction_table_output = gr.Dataframe(label="Preview of Prediction Results", visible=False)

	return predict_tab, file_input, column_selection, feature_cols, target_col, preview_data, task, estimate_button, predict_button, performance_output, prediction_download, prediction_table_output

	def create_interface():
	with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
	gr.Markdown("# TabPFN-V2 Demo")

	state = gr.State(None)
	token_visible_state = gr.State(False)

	login_tab, login_email, login_password, login_button, register_accordion, register_email, register_password, register_password_confirm, register_first_name, register_last_name, register_organization, register_role, register_use_case, register_contact_via_email, register_tos_agreed, register_submit = create_login_tab()

	predict_tab, file_input, column_selection, feature_cols, target_col, preview_data, task, estimate_button, predict_button, performance_output, prediction_download, prediction_table_output = create_predict_tab()

	account_tab, list_datasets_button, download_all_data_button, logout_button, datasets_table, delete_dataset_input, delete_dataset_confirm, delete_dataset_button, download_all_data_file, delete_account_accordion, delete_account_password, delete_account_confirm, delete_account_button, token_display, show_token_button = create_account_management_tab()

	# Event handlers
	show_token_button.click(
	lambda x: not x,
	inputs=[token_visible_state],
	outputs=[token_visible_state]
	).then(
	toggle_token_display,
	inputs=[token_visible_state],
	outputs=[token_display]
	)

	login_button.click(
	login,
	inputs=[login_email, login_password],
	outputs=[login_tab, predict_tab, account_tab]
	)

	register_submit.click(
	register,
	inputs=[register_email, register_password, register_password_confirm,
	register_first_name, register_last_name, register_organization,
	register_role, register_use_case, register_contact_via_email, register_tos_agreed],
	outputs=[login_tab, predict_tab, account_tab]
	).then(
	lambda: gr.update(value=False),
	outputs=[register_accordion]
	)

	file_input.upload(
	update_column_selection,
	inputs=[file_input],
	outputs=[feature_cols, target_col, preview_data, estimate_button, predict_button, state]
	)

	estimate_button.click(
	lambda: (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)),
	outputs=[performance_output, prediction_download, prediction_table_output]
	).then(
	estimate_performance,
	inputs=[state, feature_cols, target_col, task],
	outputs=[performance_output],
	show_progress=True,
	)

	predict_button.click(
	lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)),
	outputs=[performance_output, prediction_download, prediction_table_output]
	).then(
	predict,
	inputs=[state, feature_cols, target_col, task],
	outputs=[prediction_download, prediction_table_output],
	show_progress=True,
	)

	list_datasets_button.click(
	list_datasets,
	outputs=[datasets_table]
	).then(
	lambda: (gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)),
	outputs=[delete_dataset_input, delete_dataset_confirm, delete_dataset_button]
	)

	delete_dataset_button.click(
	delete_dataset,
	inputs=[delete_dataset_input, delete_dataset_confirm],
	outputs=[datasets_table]
	).then(
	lambda: (gr.update(value=""), gr.update(value=False)),
	outputs=[delete_dataset_input, delete_dataset_confirm]
	)

	download_all_data_button.click(
	download_all_data,
	outputs=[download_all_data_file]
	)

	delete_account_button.click(
	delete_account,
	inputs=[delete_account_password, delete_account_confirm],
	outputs=[login_tab, predict_tab, account_tab]
	).then(
	lambda: (gr.update(value=""), gr.update(value=False), gr.update(value=False)),
	outputs=[delete_account_password, delete_account_confirm, delete_account_accordion]
	)

	logout_button.click(
	logout,
	outputs=[login_tab, predict_tab, account_tab]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()