import math import matplotlib.pyplot as plt import numpy as np
def createDataSet(): """ 创建测试的数据集,里面的数值中具有连续值 :return: """ dataSet = [ [0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], [0.556, 0.215], [0.403, 0.237], [0.481, 0.149], [0.437, 0.211], [0.666, 0.091], [0.243, 0.267], [0.245, 0.057], [0.343, 0.099], [0.639, 0.161], [0.657, 0.198], [0.360, 0.370], [0.593, 0.042], [0.719, 0.103], [0.359, 0.188], [0.339, 0.241], [0.282, 0.257], [0.748, 0.232], [0.714, 0.346], [0.483, 0.312], [0.478, 0.437], [0.525, 0.369], [0.751, 0.489], [0.532, 0.472], [0.473, 0.376], [0.725, 0.445], [0.446, 0.459], ]
labels = ['密度', '含糖率']
labels_full = {}
for i in range(len(labels)): labelList = [example[i] for example in dataSet] uniqueLabel = set(labelList) labels_full[labels[i]] = uniqueLabel
return dataSet, labels, labels_full
def one_one_dict(a: list, b: list)->float: return pow(a[0]-b[0], 2)+pow(a[1]-b[1], 2)
def one_n_dict(sample, centers: list)->np.ndarray: sample = np.array(sample) centers = np.array(centers) samples = np.tile(sample, (centers.shape[0], 1)) distances = np.power(samples - centers, 2).sum(axis=1) return distances
def n_one_dist(X, c): c = np.array(c, ndmin=2) c_s = np.tile(c, (X.shape[0], 1)) distances = np.power(X-c_s, 2).sum(axis=1) return distances
class Maxmin(object): def __init__(self, x, theta): self.x = x self.centers = [] self.theta = theta
def init_center(self, cent=None): """ cent=None 时 随机生成初始聚类中心 否则指定点 """ if cent == None: cent_index = np.random.choice(np.shape(self.x)[0]) else: cent_index = cent self.centers.append(self.x[cent_index])
def second_center(self): dists = n_one_dist(self.x, self.centers[0]) cent_index = np.argmax(dists) self.centers.append(self.x[cent_index])
def calc_dist(self)->np.ndarray: dists = np.zeros((self.x.shape[0], len(self.centers))) for i in range(self.x.shape[0]): dist = one_n_dict(self.x[i], self.centers) dists[i] = dist return dists
def new_center(self, dists: np.ndarray)->bool: dists = np.min(dists, axis=1) if pow(max(dists), 0.5) >\ self.theta*pow(one_one_dict(self.centers[0], self.centers[1]), 0.5): self.centers.append(self.x[np.argmax(dists)]) return True else: return False
def make_ylabel(self, dists: np.ndarray)->list: index = np.argmin(dists, axis=1) return index
def fit(self, cent=None): """ cent=None 时 随机生成初始聚类中心 否则指定点 """ self.init_center(cent) self.second_center() distances = self.calc_dist() while self.new_center(distances) == True: distances = self.calc_dist() return self.make_ylabel(distances)
if __name__ == "__main__": data = [[0, 0], [3, 8], [2, 2], [1, 1], [5, 3], [4, 8], [6, 3], [5, 4], [6, 4], [7, 5]] data = np.array(data) mm = Maxmin(data, 0.5) y = mm.fit() plt.scatter(x=data[:, 0], y=data[:, 1], c=y) plt.show()
data, _, _ = createDataSet() data = np.array(data) mm1 = Maxmin(data, 0.5) y = mm1.fit() plt.scatter(x=data[:, 0], y=data[:, 1], c=y) plt.show()
|