1、K-Means算法java实现:


public class BasicKMeans {
  
  public static void main(String[] args) {
   // TODO Auto-generated method stub
   double[] p = { 1, 2, 3, 5, 6, 7, 9, 10, 11, 100, 150, 200, 1000 };
   int k = 5;
   double[][] g;
   g = cluster(p, k);
   for (int i = 0; i < g.length; i++) {
    for (int j = 0; j < g[i].length; j++) {
     System.out.print(g[i][j]);
     System.out.print("\t");
    }
    System.out.println();
   }
  }
  
  public static double[][] cluster(double[] p, int k) {
   // 存放聚类旧的聚类中心
   double[] c = new double[k];
   // 存放新计算的聚类中心
   double[] nc = new double[k];
   // 存放放回结果
   double[][] g;
   // 初始化聚类中心
   // 经典方法是随机选取 k 个
   // 本例中采用前 k 个作为聚类中心
   // 聚类中心的选取不影响最终结果
   for (int i = 0; i < k; i++)
    c[i] = p[i];
   // 循环聚类,更新聚类中心
   // 到聚类中心不变为止
   while (true) {
    // 根据聚类中心将元素分类
    g = group(p, c);
    // 计算分类后的聚类中心
    for (int i = 0; i < g.length; i++) {
     nc[i] = center(g[i]);
    }
    // 如果聚类中心不同
    if (!equal(nc, c)) {
     // 为下一次聚类准备
     c = nc;
     nc = new double[k];
    } else // 聚类结束
     break;
   }
   // 返回聚类结果
   return g;
  }
  
  public static double center(double[] p) {
   return sum(p) / p.length;
  }
  
  public static double[][] group(double[] p, double[] c) {
   // 中间变量,用来分组标记
   int[] gi = new int[p.length];
   // 考察每一个元素 pi 同聚类中心 cj 的距离
   // pi 与 cj 的距离最小则归为 j 类
   for (int i = 0; i < p.length; i++) {
    // 存放距离
    double[] d = new double[c.length];
    // 计算到每个聚类中心的距离
    for (int j = 0; j < c.length; j++) {
     d[j] = distance(p[i], c[j]);
    }
    // 找出最小距离
    int ci = min(d);
    // 标记属于哪一组
    gi[i] = ci;
   }
   // 存放分组结果
   double[][] g = new double[c.length][];
   // 遍历每个聚类中心,分组
   for (int i = 0; i < c.length; i++) {
    // 中间变量,记录聚类后每一组的大小
    int s = 0;
    // 计算每一组的长度
    for (int j = 0; j < gi.length; j++)
     if (gi[j] == i)
      s++;
    // 存储每一组的成员
    g[i] = new double[s];
    s = 0;
    // 根据分组标记将各元素归位
    for (int j = 0; j < gi.length; j++)
     if (gi[j] == i) {
      g[i][s] = p[j];
      s++;
     }
   }
   // 返回分组结果
   return g;
  }  public static double distance(double x, double y) {
   return Math.abs(x - y);
  }  public static double sum(double[] p) {
   double sum = 0.0;
   for (int i = 0; i < p.length; i++)
    sum += p[i];
   return sum;
  }  public static int min(double[] p) {
   int i = 0;
   double m = p[0];
   for (int j = 1; j < p.length; j++) {
    if (p[j] < m) {
     i = j;
     m = p[j];
    }
   }
   return i;
  }  public static boolean equal(double[] a, double[] b) {
   if (a.length != b.length)
    return false;
   else {
    for (int i = 0; i < a.length; i++) {
     if (a[i] != b[i])
      return false;
    }
   }
   return true;
  }
 }



2、层次聚类算法的java实现:

(1)DataPoint.java

public class DataPoint {
     String dataPointName; // 样本点名
     Cluster cluster; // 样本点所属类簇
      
  
   public DataPoint(double[] dimensioin,String dataPointName){
          this.dataPointName=dataPointName;
          this.dimensioin=dimensioin;
       public double[] getDimensioin() {
         return dimensioin;
       public void setDimensioin(double[] dimensioin) {
         this.dimensioin = dimensioin;
       public Cluster getCluster() {
         return cluster;
       public void setCluster(Cluster cluster) {
         this.cluster = cluster;
       public String getDataPointName() {
         return dataPointName;
       public void setDataPointName(String dataPointName) {
         this.dataPointName = dataPointName;
     }
 }


(2)Cluster.java

import java.util.ArrayList;
 import java.util.List; public class Cluster {
     private List<DataPoint> dataPoints = new ArrayList<DataPoint>(); // 类簇中的样本点
       public List<DataPoint> getDataPoints() {
         return dataPoints;
       public void setDataPoints(List<DataPoint> dataPoints) {
         this.dataPoints = dataPoints;
       public String getClusterName() {
         return clusterName;
       public void setClusterName(String clusterName) {
         this.clusterName = clusterName;
    }

(3)ClusterAnalysis.java

//层次聚类分析,程序入口;
 import java.util.ArrayList;
 import java.util.List; public class ClusterAnalysis {
    public List<Cluster> startAnalysis(List<DataPoint> dataPoints,int ClusterNum){
       List<Cluster> finalClusters=new ArrayList<Cluster>();
     
       List<Cluster> originalClusters=initialCluster(dataPoints);
       finalClusters=originalClusters;
       while(finalClusters.size()>ClusterNum){
           double min=Double.MAX_VALUE;
           int mergeIndexA=0;
           int mergeIndexB=0;
           for(int i=0;i<finalClusters.size();i++){
               for(int j=0;j<finalClusters.size();j++){
                   if(i!=j){
                       Cluster clusterA=finalClusters.get(i);
                                           List<DataPoint> dataPointsA=clusterA.getDataPoints();
                                           for(int m=0;m<dataPointsA.size();m++){
                           for(int n=0;n<dataPointsB.size();n++){
                               double tempDis=getDistance(dataPointsA.get(m),dataPointsB.get(n));
                               if(tempDis<min){
                                   min=tempDis;
                                   mergeIndexA=i;
                                   mergeIndexB=j;
                               }
                           }
                       }
                   }
               } //end for j
           }// end for i
           //合并cluster[mergeIndexA]和cluster[mergeIndexB]
           finalClusters=mergeCluster(finalClusters,mergeIndexA,mergeIndexB);
           return finalClusters;
    }
    private List<Cluster> mergeCluster(List<Cluster> clusters,int mergeIndexA,int mergeIndexB){
        if (mergeIndexA != mergeIndexB) {
            // 将cluster[mergeIndexB]中的DataPoint加入到 cluster[mergeIndexA]
            Cluster clusterA = clusters.get(mergeIndexA);
                     List<DataPoint> dpA = clusterA.getDataPoints();
                     for (DataPoint dp : dpB) {
                DataPoint tempDp = new DataPoint();
                tempDp.setDataPointName(dp.getDataPointName());
                tempDp.setDimensioin(dp.getDimensioin());
                tempDp.setCluster(clusterA);
                dpA.add(tempDp);
                    
          // List<Cluster> clusters中移除cluster[mergeIndexB]
            clusters.remove(mergeIndexB);
             return clusters;
   // 初始化类簇
   private List<Cluster> initialCluster(List<DataPoint> dataPoints){
       List<Cluster> originalClusters=new ArrayList<Cluster>();
       for(int i=0;i<dataPoints.size();i++){
           DataPoint tempDataPoint=dataPoints.get(i);
           List<DataPoint> tempDataPoints=new ArrayList<DataPoint>();
                   Cluster tempCluster=new Cluster();
           tempCluster.setClusterName("Cluster "+String.valueOf(i));
                   tempDataPoint.setCluster(tempCluster);
           originalClusters.add(tempCluster);
           return originalClusters;
   //计算两个样本点之间的欧几里得距离
   private double getDistance(DataPoint dpA,DataPoint dpB){
        double distance=0;
        double[] dimA = dpA.getDimensioin();
             if (dimA.length == dimB.length) {
            for (int i = 0; i < dimA.length; i++) {
                 double temp=Math.pow((dimA[i]-dimB[i]),2);
                 distance=distance+temp;
            }
            distance=Math.pow(distance, 0.5);
            return distance;
   public static void main(String[] args){
       ArrayList<DataPoint> dpoints = new ArrayList<DataPoint>();
     
       double[] a={2,3};
       double[] b={2,4};
       double[] c={1,4};
       double[] d={1,3};
       double[] e={2,2};
           double[] g={8,7};
       double[] h={8,6};
       double[] i={7,7};
       double[] j={7,6};
      //     
       double[] m={8,20};
       double[] n={8,19};
       double[] o={7,18};
       double[] p={7,17};
           dpoints.add(new DataPoint(a,"a"));
       dpoints.add(new DataPoint(b,"b"));
       dpoints.add(new DataPoint(c,"c"));
       dpoints.add(new DataPoint(d,"d"));
       dpoints.add(new DataPoint(e,"e"));
           dpoints.add(new DataPoint(g,"g"));
       dpoints.add(new DataPoint(h,"h"));
       dpoints.add(new DataPoint(i,"i"));
       dpoints.add(new DataPoint(j,"j"));
      //     
     dpoints.add(new DataPoint(m,"m"));
       dpoints.add(new DataPoint(n,"n"));
       dpoints.add(new DataPoint(o,"o"));
       dpoints.add(new DataPoint(p,"p"));
          
     ClusterAnalysis ca=new ClusterAnalysis();
           for(Cluster cl:clusters){
           System.out.println("------"+cl.getClusterName()+"------");
           List<DataPoint> tempDps=cl.getDataPoints();
           for(DataPoint tempdp:tempDps){
               System.out.println(tempdp.getDataPointName());
           }
       }
 }