Spaces:
Build error
Build error
| import pandas as pd | |
| from sklearn.cluster import KMeans | |
| import plotly.express as px | |
| def k_means(dataset, cols, drop_features, sample_data): | |
| X = sample_data | |
| print(X) | |
| N = len(sample_data.columns) | |
| print(N) | |
| distortions = [] | |
| K = range(1,11) | |
| print('ok') | |
| for i in K: | |
| try: | |
| print(i) | |
| kmeans = KMeans(n_clusters=i, init='k-means++') | |
| print("length before",len(X.columns)) | |
| kmeans = kmeans.fit(X) | |
| print("length after fit",len(X.columns)) | |
| distortions.append(kmeans.inertia_) | |
| except Exception as e: | |
| print(e) | |
| pass | |
| print(distortions) | |
| df = pd.DataFrame({'Clusters': K, 'Distortions': distortions}) | |
| print(df) | |
| elbow_curve = (px.line(df, x='Clusters', y='Distortions')).update_traces(mode='lines+markers') | |
| #Silhouette score | |
| # silhouette_scores = [] | |
| # rang = range(2,12) | |
| # for cluster_size in rang: | |
| # kmeans = cluster.KMeans(n_clusters=cluster_size, init='k-means++', random_state=200) | |
| # labels = kmeans.fit(X).labels_ | |
| # silhouette_score = metrics.silhouette_score(sample_data, | |
| # labels, | |
| # metric='euclidean', | |
| # sample_size=1000, | |
| # random_state=200) | |
| # silhouette_scores.append(silhouette_score) | |
| # df = pd.DataFrame({'Clusters': rang, 'Silhouette Score': silhouette_scores}) | |
| # silhouette = (px.line(df, x='Clusters', y='Silhouette Score', template='seaborn')).update_traces(mode='lines+markers') | |
| return elbow_curve |