Salesforce PD1
Salesforce PD1
For Optimal:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
# Use the optimal k-means clustering algorithm to determine the number of clusters
model = KMeans()
visualizer = KElbowVisualizer(model, k=(1,10))
visualizer.fit(data[['Age', 'Spending Score (1-100)']])
visualizer.show()
# Draw a scatter plot displaying data points colored on the basis of clusters
optimal_k = visualizer.elbow_value_
kmeans = KMeans(n_clusters=optimal_k, init='k-means++', max_iter=300, n_init=10,
random_state=0)
clusters = kmeans.fit_predict(data[['Age', 'Spending Score (1-100)']])
data['Cluster'] = clusters
plt.scatter(data['Age'], data['Spending Score (1-100)'], c=data['Cluster'], cmap='viridis')
plt.xlabel('Age')
plt.ylabel('Spending Score (1-100)')
plt.show()
For Sub-Optimal:
import pandas as pd
import numpy as np
data = pd.read_csv('customers.csv')
print(data.isnull().sum())
plt.xlabel('Age')
plt.ylabel('Spending Score (1-100)')
plt.show()
# Use the suboptimal k-means clustering algorithm to determine the number of clusters
sum_of_squared_distances = []
K = range(1,11)
for k in K:
sum_of_squared_distances.append(km.inertia_)
plt.xlabel('Number of Clusters')
plt.show()
# Draw a scatter plot displaying data points colored on the basis of clusters
data['Cluster'] = clusters
plt.xlabel('Age')
plt.show()