public class KMeansCluster { 


        private int k;//簇的个数 


        private int num = 100000;//迭代次数 


        private List<double> datas;//原始样本集 


        private String address;//样本集路径 


        private List<point> data = new ArrayList<point>(); 


        private AbstractDistance distance = new AbstractDistance() { 


            @Override 


            public double getDis(Point p1, Point p2) { 


                //欧几里德距离 


                return Math.sqrt(Math.pow(p1.getX() - p2.getX(), 2) + Math.pow(p1.getY() - p2.getY(), 2)); 


            } 


        }; 


       


        public KMeansCluster(int k, int num, String address) { 


            this.k = k; 


            this.num = num; 


            this.address = address; 


        } 


       


        public KMeansCluster(int k, String address) { 


            this.k = k; 


            this.address = address; 


        } 


       


        public KMeansCluster(int k, List<double> datas) { 


            this.k = k; 


            this.datas = datas; 


        } 


       


        public KMeansCluster(int k, int num, List<double> datas) { 


            this.k = k; 


            this.num = num; 


            this.datas = datas; 


        } 


       


        private void check() { 


            if (k == 0) 


                throw new IllegalArgumentException("k must be the number > 0"); 


       


            if (address == null && datas == null) 


                throw new IllegalArgumentException("program can't get real data"); 


        } 


       


        /**


         * 初始化数据


         *


         * @throws java.io.FileNotFoundException


         */ 


        public void init() throws FileNotFoundException { 


            check(); 


            //读取文件,init data 


            //处理原始数据 


            for (int i = 0, j = datas.size(); i < j; i++) 


                data.add(new Point(i, datas.get(i), 0)); 


        } 


       


        /**


         * 第一次随机选取中心点


         *


         * @return


         */ 


        public Set<point> chooseCenter() { 


            Set<point> center = new HashSet<point>(); 


            Random ran = new Random(); 


            int roll = 0; 


            while (center.size() < k) { 


                roll = ran.nextInt(data.size()); 


                center.add(data.get(roll)); 


            } 


            return center; 


        } 


       


        /**


         * @param center


         * @return


         */ 


        public List<cluster> prepare(Set<point> center) { 


            List<cluster> cluster = new ArrayList<cluster>(); 


            Iterator<point> it = center.iterator(); 


            int id = 0; 


            while (it.hasNext()) { 


                Point p = it.next(); 


                if (p.isBeyond()) { 


                    Cluster c = new Cluster(id++, p); 


                    c.addPoint(p); 


                    cluster.add(c); 


                } else 


                    cluster.add(new Cluster(id++, p)); 


            } 


            return cluster; 


        } 


       


        /**


         * 第一次运算,中心点为样本值


         *


         * @param center


         * @param cluster


         * @return


         */ 


        public List<cluster> clustering(Set<point> center, List<cluster> cluster) { 


            Point[] p = center.toArray(new Point[0]); 


            TreeSet<distence> distence = new TreeSet<distence>();//存放距离信息 


            Point source; 


            Point dest; 


            boolean flag = false; 


            for (int i = 0, n = data.size(); i < n; i++) { 


                distence.clear(); 


                for (int j = 0; j < center.size(); j++) { 


                    if (center.contains(data.get(i))) 


                        break; 


       


                    flag = true; 


                    // 计算距离 


                    source = data.get(i); 


                    dest = p[j]; 


                    distence.add(new Distence(source, dest, distance)); 


                } 


                if (flag == true) { 


                    Distence min = distence.first(); 


                    for (int m = 0, k = cluster.size(); m < k; m++) { 


                        if (cluster.get(m).getCenter().equals(min.getDest())) 


                            cluster.get(m).addPoint(min.getSource()); 


       


                    } 


                } 


                flag = false; 


            } 


       


            return cluster; 


        } 


       


        /**


         * 迭代运算,中心点为簇内样本均值


         *


         * @param cluster


         * @return


         */ 


        public List<cluster> cluster(List<cluster> cluster) { 


    //        double error; 


            Set<point> lastCenter = new HashSet<point>(); 


            for (int m = 0; m < num; m++) { 


    //            error = 0; 


                Set<point> center = new HashSet<point>(); 


                // 重新计算聚类中心 


                for (int j = 0; j < k; j++) { 


                    List<point> ps = cluster.get(j).getMembers(); 


                    int size = ps.size(); 


                    if (size < 3) { 


                        center.add(cluster.get(j).getCenter()); 


                        continue; 


                    } 


                    // 计算距离 


                    double x = 0.0, y = 0.0; 


                    for (int k1 = 0; k1 < size; k1++) { 


                        x += ps.get(k1).getX(); 


                        y += ps.get(k1).getY(); 


                    } 


                    //得到新的中心点 


                    Point nc = new Point(-1, x / size, y / size, false); 


                    center.add(nc); 


                } 


                if (lastCenter.containsAll(center))//中心点不在变化,退出迭代 


                    break; 


                lastCenter = center; 


                // 迭代运算 


                cluster = clustering(center, prepare(center)); 


    //            for (int nz = 0; nz < k; nz++) { 


    //                error += cluster.get(nz).getError();//计算误差 


    //            } 


            } 


            return cluster; 


        } 


       


        /**


         * 输出聚类信息到控制台


         *


         * @param cs


         */ 


        public void out2console(List<cluster> cs) { 


            for (int i = 0; i < cs.size(); i++) { 


                System.out.println("No." + (i + 1) + " cluster:"); 


                Cluster c = cs.get(i); 


                List<point> p = c.getMembers(); 


                for (int j = 0; j < p.size(); j++) { 


                    System.out.println("\t" + p.get(j).getX() + " "); 


                } 


                System.out.println(); 


            } 


        } 


    }