from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster.hierarchy import linkage
from sklearn.cluster import AgglomerativeClustering

# make blobs
data = make_blobs(n_samples=500, n_features=2, centers=4, cluster_std=1.2, random_state=50)

# create np array for data points
points = data[0]
xvals = points[:,0]
yvals = points[:,1]
cluster_num = data[1]

# create scatter plot
plt.scatter(xvals, yvals, c=cluster_num, cmap='viridis')
plt.show()

kmeans = KMeans(n_clusters = 4, n_init=10)
kmeans.fit(points)
y_kmeans = kmeans.predict(points)

import os

# Get the number of available CPUs
num_cpus = os.cpu_count()
print(f"Number of CPUs available: {num_cpus}")

Number of CPUs available: 40

plt.scatter(xvals, yvals, c = y_kmeans)
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c = 'red', s = 90);
plt.show()

data_H = make_blobs(n_samples=15, n_features=2, centers=3, cluster_std=1.2, random_state=10)[0]

plt.figure(figsize = (12, 6))
labels = range(1,len(data_H)+1)
plt.scatter(data_H[:,0], data_H[:,1])
for label, xval, yval in zip(labels, data_H[:, 0], data_H[:, 1]):
    plt.annotate(label,xy = (xval, yval), xytext = (-4, 0),textcoords = 'offset points', ha = 'right', va = 'bottom')
plt.show()

linked = linkage(data_H, 'single')
plt.figure(figsize = (12, 7))
dendrogram(linked, orientation = 'top',labels = labels, 
   distance_sort ='descending',show_leaf_counts = True)
plt.show()

cluster = AgglomerativeClustering(n_clusters = 3, affinity = 'euclidean', linkage = 'ward')
cluster.fit_predict(data_H)
plt.figure(figsize = (12, 6))
plt.scatter(data_H[:,0],data_H[:,1], c = cluster.labels_)
for label, xval, yval in zip(labels, data_H[:, 0], data_H[:, 1]):
    plt.annotate(label,xy = (xval, yval), xytext = (-4, 0),textcoords = 'offset points', ha = 'right', va = 'bottom')
plt.show()

Source Information¶

Goal¶

K-Means Clustering¶

Required Modules for the Jupyter Notebook¶

Agglomerative Hierarchical Clustering¶

Submit Ticket¶