6
6
ipynb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
# from sklearn.metrics import silhouette_score
# import scipy.cluster.hierarchy as shc
# from sklearn.cluster import AgglomerativeClustering
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('Mall_Customers.xls')
df.head(5)
Out[15]:
CustomerID Gender Age Annual Income (k$) Spending Score (1-100)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
df = df.drop(columns=['CustomerID'])
1 of 4 30-10-2024, 22:07
6 - Jupyter Notebook http://localhost:8888/notebooks/Practicals_AI/6.ipynb
X = df[['Income(k$/yr)', 'SpendScore(1-100)']].copy()
print(X.head(5))
Income(k$/yr) SpendScore(1-100)
0 15 39
1 15 81
2 16 6
3 16 77
4 17 40
scaler = StandardScaler()
X_kmeans = scaler.fit_transform(X)
wcss = []
wcss
Out[19]: [400.0,
269.69101219276394,
157.70400815035947,
108.92131661364357,
65.5684081557168,
55.05734827038599,
44.86475569922556,
37.228187677585886,
32.39226763033116,
29.981897788243693]
2 of 4 30-10-2024, 22:07
6 - Jupyter Notebook http://localhost:8888/notebooks/Practicals_AI/6.ipynb
In [20]: ## Plotting the Elbow Plot to determine the ideal number of clusters
plt.figure(figsize=(8,5))
plt.show()
Out[21]: array([4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2,
4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 0,
4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 1, 3, 1, 3, 1,
0, 1, 3, 1, 3, 1, 3, 1, 3, 1, 0, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1])
3 of 4 30-10-2024, 22:07
6 - Jupyter Notebook http://localhost:8888/notebooks/Practicals_AI/6.ipynb
In [23]: ## Visualising the new dataframe with cluster numbers through scatterplot
plt.figure(figsize=(8,5))
plt.title('KMeans Cluster Diagram : Scatterplot', fontsize=14)
sns.scatterplot(data=df_Kmeans, x='SpendScore(1-100)', y='Income(k$/yr)', hue=
plt.legend(bbox_to_anchor=(1.02, 1), loc='best', borderaxespad=0)
plt.show()
4 of 4 30-10-2024, 22:07