聚类分析的python实现

K-Means算法

import numpy as np

import pandas as pd

from sklearn.cluster import KMeans

data = pd.read_csv('multi_vol.csv')

data1 = data.T #使待分类样本格式正确

estimator = KMeans(n_clusters=4) #构造聚类器

estimator.fit(data1)

label_pred = estimator.labels_  #最终聚类类别  

centroids = estimator.cluster_centers_ #最终聚类中心

inertia = estimator.inertia_ 


学习向量量化LVQ

#迭代轮数

for i in range(loops):

    #随机产生样本

    index = np.random.randint(0,30)

    min_dist = sum((q[0] - x[index])**2)

    q_index = 0

    #搜索里样本最近原型向量

    for j in range(1,len(q)):

        dist = sum((q[j] - x[index])**2)

        if dist < min_dist:

            min_dist = dist

            q_index = j

    if q_label[q_index] == y[index]:

        #样本标记和原型向量标记相同,该原型向量向样本方向移动

        #eta为学习率

        q[q_index] += eta*(x[index]-q[q_index])

    else:

        #样本标记和原型向量标记相同,该原型向量远离样本方向

        q[q_index] -= eta*(x[index]-q[q_index])

#画图

for i in range(len(x)):

    if y[i] == 0:

        plt.plot(x[i,0],x[i,1],'or')

    else:

        plt.plot(x[i,0],x[i,1],'o',color='black')

for i in range(len(q)):

    plt.plot(q[i,0],q[i,1],marker='*',color='blue')


高斯混合聚类 GMM

from sklearn import mixture

def test_GMM(dataMat, components=3,iter = 100,cov_type="full"):

    clst = mixture.GaussianMixture(n_components=n_components,max_iter=iter,covariance_type=cov_type)

    clst.fit(dataMat)

    predicted_labels =clst.predict(dataMat)

    return clst.means_,predicted_labels    # clst.means_返回均值


层次聚类

import numpy

import pandas

from sklearn import datasets

import scipy.cluster.hierarchy as hcluster

iris = datasets.load_iris()

data = iris.data

target = iris.target

# Compute and plot first dendrogram.

linkage = hcluster.linkage( data, method='centroid')

hcluster.dendrogram( linkage, leaf_font_size=10.)

hcluster.dendrogram( linkage, truncate_mode='lastp', p=12, leaf_font_size=12.)

p = hcluster.fcluster( linkage,3, criterion='maxclust')

ct = pandas.DataFrame({'p': p,'t': target}).pivot_table( index=['t'], columns=['p'], aggfunc=[numpy.size])


密度聚类 DBSCAN

import pandas

import matplotlib.pyplot as plt

from sklearn.cluster import DBSCAN 

#导入数据

data = pandas.read_csv("%%%%.csv")

eps =0.2

MinPts =5

model = DBSCAN(eps, MinPts)

model.fit(data)

data['type'] = model.fit_predict(data)

plt.scatter( data['x'], data['y'], c=data['type'])


参考引用:

LVQ:https://blog.csdn.net/weixin_35732969/article/details/81141005

GMM:https://blog.csdn.net/FAICULTY/article/details/79343640

层次聚类:https://www.jianshu.com/p/b5e97f8d420b

密度聚类:https://www.jianshu.com/p/c2415196cc34

你可能感兴趣的