聚类算法代码

  • 一、k均值聚类
  • 1.k均值聚类1.0
  • 2.k均值聚类结果是灰度图
  • 3.k均值聚类结果是彩色图
  • 4.k均值聚类4.0
  • 5.k均值聚类5.0
  • 6.k均值聚类6.0
  • 二、密度峰值聚类算法
  • 1.
  • 2.


一、k均值聚类

1.k均值聚类1.0

import cv2
import numpy as np
def seg_kmeans_gray(img):
    # 读取图片
    imgpath = "C:/Users/dell/Desktop/defect/picture/xingang/8.jpg"
    img = cv2.imread(imgpath)

    # 展平
    img_flat = img.reshape((img.shape[0] * img.shape[1], 1))
    img_flat = np.float32(img_flat)a

    # 迭代参数
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TermCriteria_MAX_ITER, 20, 0.5)
    flags = cv2.KMEANS_RANDOM_CENTERS

    # 进行聚类
    compactness, labels, centers = cv2.kmeans(img_flat, 2, None, criteria, 10, flags)

    # 显示结果
    img_output = 1-labels.reshape((img.shape[0], img.shape[1]))
    print(img_output)
    return img_output
    
label = seg_kmeans_gray(img)

2.k均值聚类结果是灰度图

import cv2
import matplotlib.pyplot as plt
import numpy as np


def seg_kmeans_gray():
    img0 = cv2.imread("C:/Users/dell/Desktop/defect/picture/xinbang/8.jpg", cv2.IMREAD_GRAYSCALE)
     # grayscale = True
    img = cv2.cvtColor(img0,cv2.COLOR_RGB2GRAY)
    # 展平
    img_flat = img.reshape((img.shape[0] * img.shape[1], 1))
    img_flat = np.float32(img_flat)

    # 迭代参数
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TermCriteria_MAX_ITER, 20, 0.5)
    flags = cv2.KMEANS_RANDOM_CENTERS

    # 聚类
    compactness, labels, centers = cv2.kmeans(img_flat, 2, None, criteria, 10, flags)

    # 显示结果
    img_output = labels.reshape((img.shape[0], img.shape[1]))
    plt.subplot(121), plt.imshow(img, 'gray'), plt.title('input')
    plt.subplot(122), plt.imshow(img_output, 'gray'), plt.title('kmeans')
    plt.show()
# if __name__ == '__main__':
#     seg_kmeans_gray()

3.k均值聚类结果是彩色图

import cv2
import matplotlib.pyplot as plt
import numpy as np

def seg_kmeans_color():
    img = cv2.imread("C:/Users/dell/Desktop/defect/picture/xingang/8.jpg", cv2.IMREAD_COLOR)
    b, g, r = cv2.split(img)
    img = cv2.merge([r, g, b])
    img_flat = img.reshape((img.shape[0] * img.shape[1], 3))
    img_flat = np.float32(img_flat)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TermCriteria_MAX_ITER, 20, 0.5)
    flags = cv2.KMEANS_RANDOM_CENTERS
    compactness, labels, centers = cv2.kmeans(img_flat, 2, None, criteria, 10, flags)
    img_output = labels.reshape((img.shape[0], img.shape[1]))
    plt.subplot(121), plt.imshow(img), plt.title('input')
    plt.subplot(122), plt.imshow(img_output, 'gray'), plt.title('kmeans')
    plt.show()
# if __name__ == '__main__':
#     seg_kmeans_color()

4.k均值聚类4.0

import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('C:/Users/S5/Desktop/PIC/case1/g1.png',0)#image read be 'gray'
plt.subplot(121),plt.imshow(img,'gray'),plt.title('original')
plt.xticks([]),plt.yticks([])

#change img(2D) to 1D
img1 = img.reshape((img.shape[0]*img.shape[1],1))
img1 = np.float32(img1)

#define criteria = (type,max_iter,epsilon)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,10,1.0)

#set flags: hou to choose the initial center
#---cv2.KMEANS_PP_CENTERS ; cv2.KMEANS_RANDOM_CENTERS
flags = cv2.KMEANS_RANDOM_CENTERS
# apply kmenas
compactness,labels,centers = cv2.kmeans(img1,2,None,criteria,10,flags)

img2 = labels.reshape((img.shape[0],img.shape[1]))
plt.subplot(122),plt.imshow(img2,'gray'),plt.title('kmeans')
plt.xticks([]),plt.yticks([])

5.k均值聚类5.0

import cv2
import numpy as np
img = cv2.imread('C:/Users/S5/Desktop/PIC/case3/or4.png',0)
def seg_kmeans_gray(img):
    # 读取图片
    img = img

    # 展平
    img_flat = img.reshape((img.shape[0] * img.shape[1], 1))
    img_flat = np.float32(img_flat)

    # 迭代参数
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TermCriteria_MAX_ITER, 20, 0.5)
    flags = cv2.KMEANS_RANDOM_CENTERS

    # 进行聚类
    compactness, labels, centers = cv2.kmeans(img_flat, 4, None, criteria, 10, flags)

    # 显示结果
    img_output = 1-labels.reshape((img.shape[0], img.shape[1]))
    print(img_output)
    return img_output
    
label = seg_kmeans_gray(img)

6.k均值聚类6.0

import numpy as np
import random
def loss_function(present_center, pre_center):
  '''
  损失函数,计算上一次与当前聚类中的差异(像素差的平方和)
  :param present_center: 当前聚类中心
  :param pre_center: 上一次聚类中心
  :return: 损失值
  '''
  present_center = np.array(present_center)
  pre_center = np.array(pre_center)
  return np.sum((present_center - pre_center)**2)

def classifer(intput_signal, center):
  '''
  分类器(通过当前的聚类中心,给输入图像分类)
  :param intput_signal: 输入图像
  :param center: 聚类中心
  :return: 标签矩阵
  '''
  input_row, input_col= intput_signal.shape # 输入图像的尺寸

  pixls_labels = np.zeros((input_row, input_col)) # 储存所有像素标签

  pixl_distance_t = [] # 单个元素与所有聚类中心的距离,临时用

  for i in range(input_row):
    for j in range(input_col):
      # 计算每个像素与所有聚类中心的差平方
      for k in range(len(center)):
        distance_t = np.sum(abs((intput_signal[i, j]).astype(int) - center[k].astype(int))**2)
        pixl_distance_t.append(distance_t)
      # 差异最小则为该类
      pixls_labels[i, j] = int(pixl_distance_t.index(min(pixl_distance_t)))
      # 清空该list,为下一个像素点做准备
      pixl_distance_t = []
  return pixls_labels

def k_means(input_signal, center_num, threshold):
  '''
  基于k-means算法的图像分割(适用于灰度图)
  :param input_signal: 输入图像
  :param center_num: 聚类中心数目
  :param threshold: 迭代阈值
  :return:
  '''
  input_signal_cp = np.copy(input_signal) # 输入信号的副本
  input_row, input_col = input_signal_cp.shape # 输入图像的尺寸
  pixls_labels = np.zeros((input_row, input_col)) # 储存所有像素标签

  # 随机初始聚类中心行标与列标
  initial_center_row_num = [i for i in range(input_row)]
  random.shuffle(initial_center_row_num)
  initial_center_row_num = initial_center_row_num[:center_num]

  initial_center_col_num = [i for i in range(input_col)]
  random.shuffle(initial_center_col_num)
  initial_center_col_num = initial_center_col_num[:center_num]

  # 当前的聚类中心
  present_center = []
  for i in range(center_num):
    present_center.append(input_signal_cp[initial_center_row_num[i], initial_center_row_num[i]])
  pixls_labels = classifer(input_signal_cp, present_center)

  num = 0 # 用于记录迭代次数
  while True:
    pre_centet = present_center.copy() # 储存前一次的聚类中心
    # 计算当前聚类中心
    for n in range(center_num):
      temp = np.where(pixls_labels == n)
      present_center[n] = sum(input_signal_cp[temp].astype(int)) / len(input_signal_cp[temp])
    # 根据当前聚类中心分类
    pixls_labels = classifer(input_signal_cp, present_center)
    # 计算上一次聚类中心与当前聚类中心的差异
    loss = loss_function(present_center, pre_centet)
    num = num + 1
    print("Step:"+ str(num) + "  Loss:" + str(loss))
    # 当损失小于迭代阈值时,结束迭代
    if loss <= threshold:
      break
  return pixls_labels

二、密度峰值聚类算法

1.

import numpy as np
import matplotlib.pyplot as plt
import collections
import cv2 as cv
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
data_set=cv.imread("C:/Users/S5/Desktop/p1.png")
def cal_distance(data_set):
    return squareform(pdist(data_set, metric='euclidean'))
    # 计算任意两点距离
def cal_distance(data_set):
    return squareform(pdist(data_set, metric='euclidean'))
 # 计算截断距离dc
def cal_dc(distance, per):
    nums = np.shape(distance)[0]
    tt = np.reshape(distance, nums*nums)
    parcent = per
    position = int(nums*(nums-1)*parcent/100)
    dc = np.sort(tt)[position+nums]
    return dc
 # 使用Gaussian kernel计算局部密度(连续值)
def cal_density_by_gaussian(distance, dc):
    density = np.zeros(shape=len(distance))
    for i in range(len(distance)):
        density[i] = np.sum(np.exp(-(distance[i, :]/dc) ** 2)) - 1
    return density
#计算密度大于自身的点中距离自己最近的那个点的距离并且保存其序号
def cal_sigma(density, distance):
    nums = np.shape(distance)[0]
    sigma = np.zeros(nums)  # 度大于自身的点中距离自己最近的距离
    nearest_neiber = np.zeros(nums)  # 度大于自身的点中距离自己最近的那个点的序号
#把密度降序排序(注意是索引)
    sort_density_index = np.argsort(-density)
    for i, index in enumerate(sort_density_index):
        # 密度最大点
        if i == 0:
            continue
        # 对于其他点,找到密度比其大的点的序号
        density_higher_index = sort_density_index[:i]
        #  获取这些点到当前点的距离,并找出最小值
        sigma[index] = np.min(distance[index, density_higher_index])
        # 保存邻近点编号
        index_nn = np.argmin(distance[index, density_higher_index])
        nearest_neiber[index] = density_higher_index[index_nn].astype(int)
    sigma[sort_density_index[0]] = np.max(sigma)
    return sigma, nearest_neiber
#确定类别点,计算gamma值并画图
def show_decision_chart(density, sigma):
    # 注意进行归一化
    normal_density = (density - np.min(density)) / (np.max(density) - np.min(density))
    normal_sigma = (sigma - np.min(sigma)) / (np.max(sigma) - np.min(sigma))
    gamma = normal_density * normal_sigma
    plt.figure(num=2, figsize=(15, 10))
    plt.scatter(x=range(len(sigma)), y=-np.sort(-gamma), c='k', marker='o', s=-np.sort(-gamma)*100)
    plt.xlabel('data_num')
    plt.ylabel('gamma')
    plt.title('gamma')
    plt.show()
    return gamma
#进行聚类
def clustering(density, centroids, nearest_neiber):
    clusters = -1 * np.ones(np.shape(density)[0]).astype(int)
    #  首先对几个聚类中心标号
    for i, center in enumerate(centroids):
        clusters[center] = i
    # 密度排序
    density_sort_index = np.argsort(-density)
    for i, index in enumerate(density_sort_index):
        #  从密度大的点标号
        if clusters[index] == -1:  # 如果未被标记,那么标记
            clusters[index] = clusters[int(nearest_neiber[index])]
    return clusters
#结果展示
def show_result(cluster, norm_data, centroids_id):
    colors = [
              '#FF0000', '#FFA500', '#FFFF00', '#00FF00', '#228B22',
              '#0000FF', '#FF1493', '#EE82EE', '#000000', '#FFA500',
              '#00FF00', '#006400', '#00FFFF', '#0000FF', '#FFFACD',
              ]
    # 画最终聚类效果图
    leader_color = {}  # 聚类中心颜色
    main_leaders = dict(collections.Counter(cluster)).keys()
    for index, i in enumerate(main_leaders):
        leader_color[i] = index
    plt.figure(num=3, figsize=(15, 10))
    for node, class_ in enumerate(cluster):
        if node in centroids_id:  # 聚类中心
            plt.scatter(x=norm_data[node, 0], y=norm_data[node, 1], marker='+', s=100, c='black', alpha=0.8)
        else:  # 非聚类中心
            plt.scatter(x=norm_data[node, 0], y=norm_data[node, 1], c=colors[leader_color[class_]], s=5, marker='o',
                        alpha=0.66)
    plt.title('The Result Of Cluster')
    plt.show()
#显示原始数据和决策图图
def show_optionmal(density, sigma, data_set):
    plt.figure(num=1, figsize=(15, 9))
    ax1 = plt.subplot(122)
    for i in range(len(data_set)):
        plt.scatter(x=density[i], y=sigma[i], c='k', marker='o', s=15)
    plt.xlabel('density')
    plt.ylabel('detal')
    plt.title('decision chart')
    plt.sca(ax1)
    ax2 = plt.subplot(121)
    for j in range(len(data_set)):
        plt.scatter(x=data_set[j, 0], y=data_set[j, 1], marker='o', c='k', s=8)
    plt.xlabel('axis_x')
    plt.ylabel('axis_y')
    plt.title('raw data')
    plt.sca(ax2)
    plt.show()
 #主函数
def main(data_set, per):
    distance = cal_distance(data_set)  # 计算任意两点之间的距离
    dc = cal_dc(distance, per)  # 计算截断距离dc
    density = cal_density_by_gaussian(distance, dc)  # 计局部算密度
    sigma, nearest_neiber = cal_sigma(density, distance)  # 计算sigma,并统计每一点的直接上级
    gamma = show_decision_chart(density, sigma)  # 展示gamma图
    show_optionmal(density, sigma, data_set)  # 展示原始数据和决策图
    centroids_num = int(input('please input num: '))  # 根据以上信息输入簇的数目
    centroids = np.argsort(-gamma)[:centroids_num]  # 聚类中心
    cluster = clustering(density, centroids, nearest_neiber)  # 进行聚类
    show_result(cluster, data_set, centroids)  # 结果展示
  main(data_set, per)

2.

import numpy as np
import matplotlib.pyplot as plt
import collections
import cv2 as cv
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
datas=plt.imread("C:/Users/S5/Desktop/p1.png")
datas=np.squeeze(datas)
    return distance_matrix
get_distance_matrix(datas)
def get_distance_matrix(datas):
    n = np.shape(datas)[0]
    distance_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            v_i = datas[i, :]
            v_j = datas[j, :]
            distance_matrix[i, j] = np.sqrt(np.dot((v_i - v_j), (v_i - v_j)))
    return distance_matrix


def select_dc(distance_matrix):
    n = np.shape(distance_matrix)[0]
    distance_array = np.reshape(distance_matrix, n * n)
    percent = 2.0 / 100
    position = int(n * (n - 1) * percent)
    dc = np.sort(distance_array)[position + n]
    return dc


def get_local_density(distance_matrix, dc, method=None):
    n = np.shape(distance_matrix)[0]
    rhos = np.zeros(n)
    for i in range(n):
        if method is None:
            rhos[i] = np.where(distance_matrix[i, :] < dc)[0].shape[0] - 1
        else:
            pass
    return rhos


def get_deltas(distance_matrix, rhos):
    n = np.shape(distance_matrix)[0]
    deltas = np.zeros(n)
    nearest_neighbor = np.zeros(n)
    rhos_index = np.argsort(-rhos)
    for i, index in enumerate(rhos_index):
        if i == 0:
            continue
        higher_rhos_index = rhos_index[:i]
        deltas[index] = np.min(distance_matrix[index, higher_rhos_index])
        nearest_neighbors_index = np.argmin(distance_matrix[index, higher_rhos_index])
        nearest_neighbor[index] = higher_rhos_index[nearest_neighbors_index].astype(int)
    deltas[rhos_index[0]] = np.max(deltas)
    return deltas, nearest_neighbor


def find_k_centers(rhos, deltas, k):
    rho_and_delta = rhos * deltas
    centers = np.argsort(-rho_and_delta)
    return centers[:k]


def density_peal_cluster(rhos, centers, nearest_neighbor):
    k = np.shape(centers)[0]
    if k == 0:
        print("Can't find any center")
        return
    n = np.shape(rhos)[0]
    labels = -1 * np.ones(n).astype(int)

    for i, center in enumerate(centers):
        labels[center] = i

    rhos_index = np.argsort(-rhos)
    for i, index in enumerate(rhos_index):
        if labels[index] == -1:
            labels[index] = labels[int(nearest_neighbor[index])]
    return labels


def generate_gauss_datas():
    first_group = np.random.normal(20, 1.2, (100, 2))
    second_group = np.random.normal(10, 1.2, (100, 2))
    third_group = np.random.normal(15, 1.2, (100, 2))

    datas = []
    for i in range(100):
        datas.append(first_group[i])
        datas.append(second_group[i])
        datas.append(third_group[i])
    datas = np.array(datas)
    return datas


def draw_decision(datas, rhos, deltas):
    n = np.shape(datas)[0]
    for i in range(n):
        plt.scatter(rhos[i], deltas[i], s=16, color=(0, 0, 0))
        plt.annotate(str(i), xy=(rhos[i], deltas[i]), xytext=(rhos[i], deltas[i]))
        plt.xlabel('local density-ρ')
        plt.ylabel('minimum distance to higher density points-δ')
    plt.show()


def main():
    datas = generate_gauss_datas()
    distance_matrix = get_distance_matrix(datas)
    dc = select_dc(distance_matrix)
    rhos = get_local_density(distance_matrix, dc)
    deltas, nearest_neighbor = get_deltas(distance_matrix, rhos)
    centers = find_k_centers(rhos, deltas, 3)
    labels = density_peal_cluster(rhos, centers, nearest_neighbor)
    draw_decision(datas, rhos, deltas)
    plt.cla()
    fig, ax = plt.subplots()
    for i in range(300):
        if labels[i] == 0:
            ax.scatter(datas[i, 0], datas[i, 1], facecolor='C0', edgecolors='k')
        elif labels[i] == 1:
            ax.scatter(datas[i, 0], datas[i, 1], facecolor='yellow', edgecolors='k')
        elif labels[i] == 2:
            ax.scatter(datas[i, 0], datas[i, 1], facecolor='red', edgecolors='k')
    plt.show()
if __name__ == '__main__':
    main()