C# · 12月 20, 2021

K_Means算法的简单实现C++

1.K_Means算法核心思想:(计算,递归)

(1) 从 n个数据对象任意选择 k 个对象作为初始聚类中心;

(2) 根据每个聚类对象的均值(中心对象),计算每个对象与这些中心对象的距离;并根据最小距离重新对相应对象进行划分;

(3) 重新计算每个(有变化)聚类的均值(中心对象)

(4) 循环(2)到(3)直到每个聚类不再发生变化为止

2.简单实列:

问题:假设数据挖掘的任务是将8个点聚类成3个类别,A1(2,10),A2(2,5),A3(8,4),B1(5,8),B2(7,B3(6,C1(1,2),C2(3,7),C3(4,9),距离函数是Euclidean距离。 假设初始选择A1,B1,C1分别作为每个聚类的中心用 K-means来给出,

代码(C++实现):

第一次循环执行后的三个聚类中心是多少?最后的三个中心是多少?

(题目来自某位我喜欢的AI领域老师的课后习题。)

#include

#include

#include

#include

using namespace std;

enum center_flag {

set_1,

set_2,

set_3

};//集合标志

struct Point {

float x{0};

float y{0};

center_flag flag{ set_1 };

}Point_A1,Point_A2,Point_A3,

Point_B1,Point_B2,Point_B3,

Point_C1,Point_C2,Point_C3;

Point cur_cluster_1{ 2,10,set_1 },

cur_cluster_2{ 5,8,set_2 },

cur_cluster_3{ 1,2,set_3 },//三个中心点

new_cluster_1{0,set_1},

new_cluster_2{ 0,

new_cluster_3{ 0,set_3 };

vector point_set_1{};

vector point_set_2{};

vector point_set_3{};

void Init_Point(){

Point_A1.x = 2; Point_A1.y = 10;

Point_A2.x = 2; Point_A2.y = 5;

Point_A3.x = 8; Point_A3.y = 4;

Point_B1.x = 5; Point_B1.y = 8;

Point_B2.x = 7; Point_B2.y = 5;

Point_B3.x = 6; Point_B3.y = 4;

Point_C1.x = 1; Point_C1.y = 2;

Point_C2.x = 3; Point_C2.y = 7;

Point_C3.x = 4; Point_C3.y = 9;

}

float Get_Euclidean_Distance(Point A,Point B){

float d = sqrt(pow((A.x – B.x),2) + pow((A.y – B.y),2));

return d;

}

//当前点获得归属集

center_flag Get_flag(Point curPoint,Point cluster1,Point cluster2,Point cluster3) {

float a=0,b=0,c=0;

a = Get_Euclidean_Distance(curPoint,cluster1);

b = Get_Euclidean_Distance(curPoint,cluster2);

c = Get_Euclidean_Distance(curPoint,cluster3);

if (a <= b && a <= c)

return set_1;

else if (b <= c && b <= a)

return set_2;

else

return set_3;

}

//求新的中心点

Point Get_new_cluaster(vectorpoint_set) {

float new_cluaster_x = 0;

float new_cluaster_y = 0;

int n = 0;

center_flag new_cluaster_flag = set_1;

Point point_temp;

Point cur_cluaster;

for (auto i = point_set.begin(); i != point_set.end(); i++)

{

point_temp =*i;

new_cluaster_x += point_temp.x;

new_cluaster_y += point_temp.y;

}

n = point_set.size();

new_cluaster_x = new_cluaster_x/n ;

new_cluaster_y = new_cluaster_y/ n;

new_cluaster_flag = point_temp.flag;

cur_cluaster.x = new_cluaster_x;

cur_cluaster.y = new_cluaster_y;

cur_cluaster.flag = new_cluaster_flag;

return cur_cluaster;

}

bool center_NotChange() {

if(cur_cluster_1.x==new_cluster_1.x&&

cur_cluster_1.y == new_cluster_1.y&&

cur_cluster_2.x == new_cluster_2.x&&

cur_cluster_2.y == new_cluster_2.y&&

cur_cluster_3.x == new_cluster_3.x&&

cur_cluster_3.y == new_cluster_3.y)

return true;

else

{

cur_cluster_1.x = new_cluster_1.x;

cur_cluster_1.y = new_cluster_1.y;

cur_cluster_2.x = new_cluster_2.x;

cur_cluster_2.y = new_cluster_2.y;

cur_cluster_3.x = new_cluster_3.x;

cur_cluster_3.y = new_cluster_3.y;

return false;

}

}

int main() {

int m = 0;

Init_Point();

const vector Allpoint{

Point_A1,Point_C3 };

while (1)

{

for (auto point : Allpoint)

{

point.flag = Get_flag(point,cur_cluster_1,cur_cluster_2,cur_cluster_3);

switch (point.flag)

{

case set_1: {point_set_1.push_back(point); break; }

case set_2: {point_set_2.push_back(point); break; }

case set_3: {point_set_3.push_back(point); break; }

default:

string errStr = “there are something wrong with the point”;

//cout << errStr << endl;

break;

}

}

point_set_1.push_back(cur_cluster_1);

point_set_2.push_back(cur_cluster_2);

point_set_3.push_back(cur_cluster_3);

//重新计算中心/

m++;

new_cluster_1 = Get_new_cluaster(point_set_1);

new_cluster_2 = Get_new_cluaster(point_set_2);

new_cluster_3 = Get_new_cluaster(point_set_3);

//清空集合容器

point_set_1.clear();

point_set_2.clear();

point_set_3.clear();

if (center_NotChange()==true)

{

cout << "最终中心点"<<endl;

cout << "中心点1: " << new_cluster_1.x << "," << new_cluster_1.y << endl;

cout << "中心点2: " << new_cluster_2.x << "," << new_cluster_2.y << endl;

cout << "中心点3: " << new_cluster_3.x << "," << new_cluster_3.y << endl;

break;

}

else

{

cout << "第" << m << "次计算后的中心点" << endl;

cout << "中心点1: " << new_cluster_1.x << "," << new_cluster_3.y << endl;

//数组清零

}

//输出中心点

cin >> m;

return 0;

}

输出效果: