RPEnsemble代码阅读

  • 1.Other.classifier
  • 2.R
  • 3.RPchoose
  • 3.1. 函数调用
  • 3.2. 函数赋值
  • 3.3. 调用基分类器
  • 3.3.1. 调用knn
  • 3.3.2. 调用LDA
  • 3.3.2.1. 有训练集模式
  • 3.3.2.2. LOO模式
  • 3.3.3. 调用QDA
  • 4. RPChooseSS
  • 4.1.1. 调用knn【有验证集】
  • 4.1.2.调用LDA【有验证集】
  • 4.1.3. 调用QDA【有验证集】
  • 5. RPParallel
  • 6. RPalpha
  • 6.1. 函数调用
  • 6.2. 函数赋值
  • 7. RPEnsembleClass
  • 7.1. 函数调用
  • 7.2. 函数赋值
  • 8. RPGenerate
  • 8.1. 高斯随机投影
  • 8.2.Harr 随机投影
  • 8.3. Axis 随机投影
  • 9. RPModel
  • 9.1. 函数调用
  • 9.2. 模型1
  • 9.2.1. 模型初始化
  • 9.2.2. 模型建立
  • 9.3. 模型2
  • 9.3.2. 模型初始化
  • 9.4. 模型3
  • 9.5. 模型4
  • 9.6. 总结
  • 10.代码阅读总结与思考


须知:

这个是下载了源代码查看。想知道这玩意儿是不是面向测试集调参

函数目录与说明:

em算法的r语言实现PPT em算法r语言代码_赋值


后面的rd文件(在man文件夹下),略坑。不是源码文件。源码文件在R文件夹下

1.Other.classifier

跳过,没啥用

2.R

跳过,这个在man里有,也没啥用

3.RPchoose

直接上源代码,返回c(Train.Class, Test.Class)

RPChoose <-
    function(XTrain #n by p trining data matrix
           , YTrain #n vector of the classes of the trining samples
           , XTest  #n.test by p test data matrix
           , d      #dimension to project into
           , B2 = 10 #block size
           , base = "LDA" # base classifier, eg "knn","LDA","QDA" or other
           , k = c(3,5) # possible k if base = "knn"
           , projmethod = "Haar" # projection distribution eg "Haar", "axis"
           , estmethod = "training"
         ,   ... )
        {
      #psnice(value = 19)
      n <- length(YTrain)
      ntest <- nrow(XTest)
      p <- ncol(XTrain)
      k1 <- 1
      RP <-  RPGenerate(p, d, method = projmethod, B2)
      XRP <- as.matrix(crossprod(t(XTrain), RP),n,d*B2)
      if (base == "knn")
      {
        if(estmethod == "training") stop("training error estimate unsuitable for knn classifier") 
        if (estmethod == "loo"){
        weight.test <- sapply(1:B2, function(j){min(sapply(k,function(x){mean(knn.cv(as.matrix(XRP[,d*(j-1) + 1:d ],n,d), YTrain, x) != YTrain, na.rm = TRUE)}))})
        }
        cols1 <- d*(which.min(weight.test) - 1) + 1:d
        kcv.voteRP <- sapply(k,function(x){mean(knn.cv(as.matrix(XRP[, cols1],n,d), YTrain, x) != YTrain, na.rm = TRUE)})
        k1 <- k[which.min(kcv.voteRP)]
        Train.Class <- as.numeric(knn.cv(as.matrix(XRP[, cols1],n,d), YTrain, k1))
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(knn(as.matrix(XRP[, cols1],n,1), XRPTest, YTrain, k1))
      }
      
      if (base == "LDA") 
      {
        if(estmethod == "training") {
        weight.test <- sapply(1:B2, function(j){mean(predict(lda(x = as.matrix(XRP[,d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRP[1:n, d*(j-1) + 1:d], n,d))$class != YTrain, na.rm = TRUE)})
        cols1 <- d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(predict(lda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), as.matrix(XRP[, cols1],n,d))$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(predict(lda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
        if (estmethod == "loo") {
        weight.test <- sapply(1:B2, function(j){mean(lda(x = as.matrix(XRP[1:n, d*(j-1) + 1:d],n,d), grouping = YTrain, CV = TRUE)$class != YTrain, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(lda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain, CV = TRUE)$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(predict(lda(x = as.matrix(XRP[1:n, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
      }
      if (base == "QDA") 
      {      
        if(estmethod == "training") {
        weight.test <- sapply(1:B2, function(j){mean(predict(qda(x =  as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRP[,d*(j-1) + 1:d],n,d))$class != YTrain, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(predict(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), as.matrix(XRP[, cols1],n,d))$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(predict(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
        if (estmethod == "loo"){      
        weight.test <- sapply(1:B2, function(j){mean(qda(x = as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain, CV = TRUE)$class != YTrain, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain, CV = TRUE)$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]), ntest, d)
        Test.Class <- as.numeric(predict(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
      }
      if (base == "Other") 
      {
        weight.test <- sapply(1:B2, function(j){mean(Other.classifier(x = as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain, CV = TRUE)$class != YTrain, na.rm = TRUE)})
        cols1 <- d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(Other.classifier(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain, CV = TRUE)$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(Other.classifier(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain,  XRPTest)$class)
      }
      return(c(Train.Class, Test.Class)) 
    }

一点一点拆解来看

3.1. 函数调用

RPChoose <-
    function(XTrain #n by p trining data matrix
           , YTrain #n vector of the classes of the trining samples
           , XTest  #n.test by p test data matrix
           , d      #dimension to project into
           , B2 = 10 #block size
           , base = "LDA" # base classifier, eg "knn","LDA","QDA" or other
           , k = c(3,5) # possible k if base = "knn"
           , projmethod = "Haar" # projection distribution eg "Haar", "axis"
           , estmethod = "training"
         ,   ... )
  • XTrain是个n个样本,p个维度的矩阵。所以矩阵大小为n*p
  • XTest是n.test个样本,p个维度的矩阵
  • 这里默认每批次有10个
  • k = c(3,5)就是一个向量,里面装了3和5
  • 有等号的就是默认模式

3.2. 函数赋值

n <- length(YTrain)
      ntest <- nrow(XTest)
      p <- ncol(XTrain)
      k1 <- 1
      RP <-  RPGenerate(p, d, method = projmethod, B2)
      XRP <- as.matrix(crossprod(t(XTrain), RP),n,d*B2)
  • k1 = 1
  • RP使用RPGenerate来生成、有B2个。每个p*d大小
  • XRP是把XTrain的转置与RP求叉积,也就是em算法的r语言实现PPT em算法r语言代码_赋值_02。结果有n行,em算法的r语言实现PPT em算法r语言代码_机器学习_03

3.3. 调用基分类器

3.3.1. 调用knn

if (base == "knn")
      {
        if(estmethod == "training") stop("training error estimate unsuitable for knn classifier") 
        if (estmethod == "loo"){
        weight.test <- sapply(1:B2, function(j){min(sapply(k,function(x){mean(knn.cv(as.matrix(XRP[,d*(j-1) + 1:d ],n,d), YTrain, x) != YTrain, na.rm = TRUE)}))})
        }
        cols1 <- d*(which.min(weight.test) - 1) + 1:d
        kcv.voteRP <- sapply(k,function(x){mean(knn.cv(as.matrix(XRP[, cols1],n,d), YTrain, x) != YTrain, na.rm = TRUE)})
        k1 <- k[which.min(kcv.voteRP)]
        Train.Class <- as.numeric(knn.cv(as.matrix(XRP[, cols1],n,d), YTrain, k1))
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(knn(as.matrix(XRP[, cols1],n,1), XRPTest, YTrain, k1))
      }
weight.test <- sapply(1:B2, function(j){min(sapply(k,function(x){mean(knn.cv(as.matrix(XRP[,d*(j-1) + 1:d ],n,d), YTrain, x) != YTrain, na.rm = TRUE)}))})

这句有些长,一个一个来说:
a. 在 R 的基础包中有一类函数叫 apply, 包括 apply, lapply, sapply, vapply, mapply,replacate 等. 这些函数相同的地方都是接受一个 list, matrix 或者 data.frame 作为参数,
同时接受一个函数作为参数, 然后对数据的每一行或列都使用函数进行处理, 相当于 “把函数应用 (apply) 到每一个单元”.
b. 具体来说,如果apply的结果是一个所有元素长度都为1的list,sapply()会将结果转换为vector;如果apply的结果是一个所有元素长度都相等且大于1的list,sapply()会将结果转换为matrix;如果sapply()无法判断简化规则,则不对结果进行简化,返回list,此时得到的结果和lapply()相同。也就是,把1到B2的数,传给function的参数j使用。
c. function(j)内部也是这么搞的,不过是要找出最小值。把向量k传给function(x),好求均值。什么的均值?使用使用交叉验证的knn。对比示例来看:

knn.cv(train, cl, k = 1, l = 0, prob = FALSE, use.all = TRUE)
knn.cv(as.matrix(XRP[,d*(j-1) + 1:d ],n,d), YTrain, x) != YTrain, na.rm = TRUE)

可以发现
d. 训练集是as.matrix(XRP[,d*(j-1) + 1:d ],n,d)。这么写是因为要按照j来划分区块。每一块实际处理的还是n*d大小
e. cl(标签)是YTrain
f. x是传进去的参数,也就是向量c(3,5)
总的来说,就是把同批次的训练集通通传进去,使用留一法计算出每个训练集的错误率,求出均值。这均值有两个,因为传进去的k有两个值c(3,5)。选最小的一个赋值给对应的1到B2的块儿。因此weight.test是em算法的r语言实现PPT em算法r语言代码_em算法的r语言实现PPT_04大小的矩阵

  • 在赋值好后,找出目标区块【该区块训练集错的最少】
cols1 <- d*(which.min(weight.test) - 1) + 1:d
  • 找出最优的对应的k值:
    1.对投影处理过的训练集,也就是XRP,找出最优的部分,使用LOO(留一法)求出均值(有两个,因为k=c(3,5))
kcv.voteRP <- sapply(k,function(x){mean(knn.cv(as.matrix(XRP[, cols1],n,d), YTrain, x) != YTrain, na.rm = TRUE)})

2.记录下所需要的k值

k1 <- k[which.min(kcv.voteRP)]
  • 这里把因子转化为数值
Train.Class <- as.numeric(knn.cv(as.matrix(XRP[, cols1],n,d), YTrain, k1))
  • 处理测试集,即XTest*bestRP
XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
  • 对处理过的测试集进行分类
Test.Class <- as.numeric(knn(as.matrix(XRP[, cols1],n,1), XRPTest, YTrain, k1))

请注意,在这里对测试集使用的同批次最佳投影阵,是在训练集上最佳的


3.3.2. 调用LDA

3.3.2.1. 有训练集模式

贴上代码

if(estmethod == "training") {
        weight.test <- sapply(1:B2, function(j){mean(predict(lda(x = as.matrix(XRP[,d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRP[1:n, d*(j-1) + 1:d], n,d))$class != YTrain, na.rm = TRUE)})
        cols1 <- d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(predict(lda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), as.matrix(XRP[, cols1],n,d))$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(predict(lda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), XRPTest)$class)
}
  • weight.test是记录训练集在使用投影再分类后的最小误差的。具体来说,就是同一批次B2个随机投影矩阵。LDA在这句的括号里,有(x = as.matrix(XRP[,d*(j-1) + 1:d],n,d), grouping = YTrain)。生产了LDA模型后进行预测,predict的括号里有(lda(x = as.matrix(XRP[,d*(j-1) + 1:d],n,d), grouping = YTrain), as.matrix(XRP[1:n, d*(j-1) + 1:d], n,d))。也就是预测训练集的分类结果。再对错误数求均值
  • cols1 记录训练集上最小错误的投影区域
  • Train.Class 记录使用最好投影区域的训练集,在用LDA建模后的分类情况
  • XRPTest对测试集使用该投影
  • 使用训练集投影后建模的LDA,在倒入投影后的测试集进行分类
3.3.2.2. LOO模式

贴上代码

if (estmethod == "loo") {
        weight.test <- sapply(1:B2, function(j){mean(lda(x = as.matrix(XRP[1:n, d*(j-1) + 1:d],n,d), grouping = YTrain, CV = TRUE)$class != YTrain, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(lda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain, CV = TRUE)$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(predict(lda(x = as.matrix(XRP[1:n, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
  • 这里weight.test开了CV
  • Train.Class也开了CV
  • Test.Class倒也理所当然地没开CV

3.3.3. 调用QDA

if (base == "QDA") 
      {      
        if(estmethod == "training") {
        weight.test <- sapply(1:B2, function(j){mean(predict(qda(x =  as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRP[,d*(j-1) + 1:d],n,d))$class != YTrain, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(predict(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), as.matrix(XRP[, cols1],n,d))$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),ntest,d)
        Test.Class <- as.numeric(predict(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
        if (estmethod == "loo"){      
        weight.test <- sapply(1:B2, function(j){mean(qda(x = as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain, CV = TRUE)$class != YTrain, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Train.Class <- as.numeric(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain, CV = TRUE)$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]), ntest, d)
        Test.Class <- as.numeric(predict(qda(x = as.matrix(XRP[, cols1],n,d), grouping = YTrain), XRPTest)$class)
        }
      }

和LDA差不多一回事

4. RPChooseSS

这玩意就是个在划分样本的时候用RPChoose的示例了。返回(c(Val.Class, Test.Class))

RPChooseSS <-
    function(XTrain #n by p training data matrix
           , YTrain #n vector of the classes of the training samples
           , XVal #n.val by p validation data matrix
           , YVal #n.val vector of the classes of the validation samples
           , XTest  #n.test by p test data matrix
           , d      #dimension to project into
           , B2 = 100 #block size
           , base = "LDA" # base classifier, eg "knn","LDA","QDA" or other
           , k = c(3,5) # possible k if base = "knn"
           , projmethod = "Haar" # projection distribution eg "Haar", "axis"
         ,   ... )
        {
    n <- length(YTrain)
    p <- ncol(XTrain)
    n.val <- length(YVal)
    n.test <- nrow(XTest)
    w <- n.val
    RP <-  RPGenerate(p, d, method = projmethod, B2)
    XRP <- as.matrix(crossprod(t(XTrain), RP),n,B2*d)
    XRPVal <- as.matrix(crossprod(t(XVal), RP),n.val,B2*d)
    if (base == "knn")
    {
      weight.test <- sapply(1:B2, function(j){min(sapply(k,function(x){mean(knn(as.matrix(XRP[, d*(j-1) + 1:d],n,d), as.matrix(XRPVal[, d*(j-1) + 1:d],n.val,d), YTrain, x) != YVal, na.rm = TRUE)}))})
      cols1 <- d*(which.min(weight.test) - 1) + 1:d
      kcv.voteRP <- sapply(k,function(x){mean(knn(as.matrix(XRP[,cols1],n,d), as.matrix(XRPVal[,cols1],n.val,d), YTrain, x) != YVal, na.rm = TRUE)})
      k1 <- k[which.min(kcv.voteRP)]
      Val.Class <- as.numeric(knn(as.matrix(XRP[,cols1],n,d), as.matrix(XRPVal[,cols1],n.val,d), YTrain, k1))
      XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
      Test.Class <- as.numeric(knn(as.matrix(XRP[,cols1],n,d), XRPTest, YTrain, k1))
    }
    if (base == "LDA") 
    {
      weight.test <- sapply(1:B2, function(j){mean(predict(lda(x =  as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRPVal[, d*(j-1) + 1:d],n.val,d))$class != YVal, na.rm = TRUE)})
      cols1 <- d*(which.min(weight.test) - 1) + 1:d
      Val.Class <- as.numeric(predict(lda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), as.matrix(XRPVal[,cols1],n.val,d))$class)
      XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
      Test.Class <- as.numeric(predict(lda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), XRPTest)$class)
    }
    if (base == "QDA") 
    {      
        weight.test <- sapply(1:B2, function(j){mean(predict(qda(x =  as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRPVal[, d*(j-1) + 1:d],n.val,d))$class != YVal, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Val.Class <- as.numeric(predict(qda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), as.matrix(XRPVal[,cols1],n.val,d))$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
        Test.Class <- as.numeric(predict(qda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), XRPTest)$class)
    }
    if (base == "Other") {
      weight.test <- sapply(1:B2, function(j){mean(Other.classifier(x = as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain, CV = TRUE)$class != YTrain, na.rm = TRUE)})
      cols1 <- d*(which.min(weight.test) - 1) + 1:d
      Val.Class <- as.numeric(Other.classifier(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain, as.matrix(XRPVal[,cols1],n.val,d))$class)
      XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
      Test.Class <- as.numeric(Other.classifier(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain, XRPTest)$class)
    }
    return(c(Val.Class, Test.Class)) 
}

在调用基分类器前都差不多。但是多了验证集:

n.val <- length(YVal)
 w <- n.val
 XRPVal <- as.matrix(crossprod(t(XVal), RP),n.val,B2*d)

4.1.1. 调用knn【有验证集】

if (base == "knn")
    {
      weight.test <- sapply(1:B2, function(j){min(sapply(k,function(x){mean(knn(as.matrix(XRP[, d*(j-1) + 1:d],n,d), as.matrix(XRPVal[, d*(j-1) + 1:d],n.val,d), YTrain, x) != YVal, na.rm = TRUE)}))})
      cols1 <- d*(which.min(weight.test) - 1) + 1:d
      kcv.voteRP <- sapply(k,function(x){mean(knn(as.matrix(XRP[,cols1],n,d), as.matrix(XRPVal[,cols1],n.val,d), YTrain, x) != YVal, na.rm = TRUE)})
      k1 <- k[which.min(kcv.voteRP)]
      Val.Class <- as.numeric(knn(as.matrix(XRP[,cols1],n,d), as.matrix(XRPVal[,cols1],n.val,d), YTrain, k1))
      XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
      Test.Class <- as.numeric(knn(as.matrix(XRP[,cols1],n,d), XRPTest, YTrain, k1))
    }
  • weight.test在这里是把验证集的错误情况拖出来,找验证集错误最小的结果
  • kcv.voteRP是找出验证集最小的错误,方便定位k等于几
  • Val.Class是标记验证集标签
  • XRPTest是选出来的投影处理测试集
  • Test.Class是拿投影后的训练集建模,扔进去投影后的测试集

4.1.2.调用LDA【有验证集】

if (base == "LDA") 
    {
      weight.test <- sapply(1:B2, function(j){mean(predict(lda(x =  as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRPVal[, d*(j-1) + 1:d],n.val,d))$class != YVal, na.rm = TRUE)})
      cols1 <- d*(which.min(weight.test) - 1) + 1:d
      Val.Class <- as.numeric(predict(lda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), as.matrix(XRPVal[,cols1],n.val,d))$class)
      XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
      Test.Class <- as.numeric(predict(lda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), XRPTest)$class)
    }
  • 投影后的训练集建模,验证集看错误
  • 找出验证集错最少的投影
  • 记录验证集分类情况
  • 测试集投影
  • 训练集投影后建模,测试集上

4.1.3. 调用QDA【有验证集】

if (base == "QDA") 
    {      
        weight.test <- sapply(1:B2, function(j){mean(predict(qda(x =  as.matrix(XRP[, d*(j-1) + 1:d],n,d), grouping = YTrain),  as.matrix(XRPVal[, d*(j-1) + 1:d],n.val,d))$class != YVal, na.rm = TRUE)})
        cols1 <-  d*(which.min(weight.test) - 1) + 1:d
        Val.Class <- as.numeric(predict(qda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), as.matrix(XRPVal[,cols1],n.val,d))$class)
        XRPTest <- as.matrix(crossprod(t(XTest),RP[,cols1]),n.test,d)
        Test.Class <- as.numeric(predict(qda(x = as.matrix(XRP[,cols1],n,d), grouping = YTrain), XRPTest)$class)
    }
  • 类似LDA

5. RPParallel

贴上脚本代码。返回RP.out。有B1个。

RPParallel <-
function(XTrain #n by p trining data matrix
       , YTrain #n vector of the classes of the trining samples
       , XVal = NULL#n.val by p validation data matrix
       , YVal = NULL #n.val vector of the classes of the validation samples
       , XTest  #n.test by p test data matrix
       , d      #dimension to project into
       , B1 = 500 #number of blocks
       , B2 = 50 #block size
       , base = "LDA" # base classifier, eg "knn","LDA","QDA", or "Other"
       , projmethod = "Gaussian"
       , estmethod = "training"
       , k = c(3,5,9) # possible k
       , clustertype = "Default" #computing cluster type
       , cores = 1
       , machines = NULL  #names of computers on network to form cluster
       , seed = 1 #reproducability seed
       , ... )
{
  if(clustertype == "Default"){cluster = makePSOCKcluster(1)}
  if(clustertype == "Fork"){cluster = makeForkCluster(cores)}
  if(clustertype == "Socket"){cluster = makePSOCKcluster(names = machines)}
  
  clusterExport(cluster, c(ls(),"knn.cv", "knn", "lda", "qda"), envir = environment())

  if(is.null(seed) == FALSE) {clusterSetRNGStream(cluster, seed)}

  if (estmethod == "samplesplit"){
      n.val <- length(YVal)
      RP.out <- simplify2array(parLapply(cl = cluster, 1:B1, function(x){return(RPChooseSS(XTrain, YTrain, XVal, YVal, XTest, d, B2, base, k, projmethod))}))
  }
  else{
    RP.out <- simplify2array(parLapply(cl = cluster, 1:B1, function(x){return(RPChoose(XTrain, YTrain, XTest, d, B2, base, k, projmethod, estmethod))}))
  }
  
  stopCluster(cluster) 
  
 return (RP.out)
}
  • 这就是把B1个批次的结果集合起来

6. RPalpha

这个就很妙了,用来确定软阈值

RPalpha <-
function(RP.out # the result of a call to RPParellel
        ,Y # Training set classes if samplesplit = FALSE, Validation set classes if samplesplit = TRUE
        ,p1 # prior probability estimate
         )
   {
    n <- length(Y)
    Train.Class <- RP.out[1:n, ]
    vote1 <- rowMeans(Train.Class[Y == 1, ], na.rm = TRUE)
    vote2 <- rowMeans(Train.Class[Y == 2, ], na.rm = TRUE)
    errecdfm <- function(x) {
        p1 * ecdf(vote1)(x) + (1 - p1) * (1 - ecdf(vote2)(x))
    }
    errecdfM <- function(x) {
        p1 * ecdf(vote1)(-x) + (1 - p1) * (1 - ecdf(vote2)(-x))
    }
    alpham <- optimise(errecdfm, c(1, 2), maximum = TRUE)$maximum
    alphaM <- optimise(errecdfM, c(-2, -1), maximum = TRUE)$maximum
    alpha <- (alpham - alphaM)/2
    return(alpha)
}

6.1. 函数调用

RPalpha <-
function(RP.out # the result of a call to RPParellel
        ,Y # Training set classes if samplesplit = FALSE, Validation set classes if samplesplit = TRUE
        ,p1 # prior probability estimate
         )
  • RP.out是之前B1的集成输出
  • 不进行划分的话,Y是训练集标签;否则,是验证集标签
  • p1是先验概率估计

6.2. 函数赋值

n <- length(Y)
    Train.Class <- RP.out[1:n, ]
  • 这里是训练集/验证集的,使用分类器估计出来的标签。是n*B1大小的。
vote1 <- rowMeans(Train.Class[Y == 1, ], na.rm = TRUE)
    vote2 <- rowMeans(Train.Class[Y == 2, ], na.rm = TRUE)
  • 在这里,先把分类器估计出来的标签【1或者2】(不是0或1,且只是二分类)挑选出来,求出行均值
errecdfm <- function(x) {
        p1 * ecdf(vote1)(x) + (1 - p1) * (1 - ecdf(vote2)(x))
    }
    errecdfM <- function(x) {
        p1 * ecdf(vote1)(-x) + (1 - p1) * (1 - ecdf(vote2)(-x))
    }
  • 一个小m一个大M用来记录最大最小的值。ecdf在R里是累计分布函数。对于任何数据分布,累积分布函数都是概率分布函数的积分;因为真实的概率分布函数未知,所以真实的累积分布函数也就无法从数据样本中精确推导出来了。ecdf()函数中的e代表Empirical,意思是根据观察值而来的,也即具体描绘出来的CDF曲线是根据各个数据点的加和而得到的。说人话就是长成往右上长的阶梯那样。
  • ecdf有两个括号。(x)是传进去的参数。ecdf(vote1)才是完整的函数名。
  • 这是个对偶函数,好用来求最值
alpham <- optimise(errecdfm, c(1, 2), maximum = TRUE)$maximum
    alphaM <- optimise(errecdfM, c(-2, -1), maximum = TRUE)$maximum
  • 这个就是求最值。使用1和2是因为标签只有1和2。对照论文式(17)

em算法的r语言实现PPT em算法r语言代码_赋值_05

带了m/M的alpha是求使误差最小的波动范围

alpha <- (alpham - alphaM)/2
  • 这是目标的alpha。求均值

7. RPEnsembleClass

先贴代码

RPEnsembleClass <-
function(RP.out # the result of a call to RPParallel
        ,n  # training sample size
        ,n.val #validation set size if samplesplit = TRUE
        ,n.test #test sample size
        ,p1 #(estimate of) prior probability
        ,samplesplit = FALSE #split sample TRUE/FALSE
        ,alpha  #voting cutoff alpha
        ,... )
    {
  if (samplesplit == TRUE){
    Test.Class <- RP.out[n.val + 1:n.test, ]
  }
  else{Test.Class <- RP.out[n + 1:n.test, ]}
  if(n.test == 1){vote <- mean(Test.Class, na.rm = TRUE)}
  if(n.test > 1){vote <- rowMeans(Test.Class, na.rm = TRUE)}
  Class  <- 1 + as.numeric(vote > alpha)
  return(Class)
}

7.1. 函数调用

function(RP.out # the result of a call to RPParallel
        ,n  # training sample size
        ,n.val #validation set size if samplesplit = TRUE
        ,n.test #test sample size
        ,p1 #(estimate of) prior probability
        ,samplesplit = FALSE #split sample TRUE/FALSE
        ,alpha  #voting cutoff alpha
        ,... )
  • 这里要吃训练集,验证集(如果样本划分开着的)和测试集,但是样本划分是关掉的

7.2. 函数赋值

if (samplesplit == TRUE){
    Test.Class <- RP.out[n.val + 1:n.test, ]
  }
  else{Test.Class <- RP.out[n + 1:n.test, ]}
  • 样本划分的开关变动,那就在RP.out取出测试集的地方进行变动
if(n.test == 1){vote <- mean(Test.Class, na.rm = TRUE)}
  if(n.test > 1){vote <- rowMeans(Test.Class, na.rm = TRUE)}
  • 这里是投票。有注意测试集样本数(容易忘)
Class  <- 1 + as.numeric(vote > alpha)
  • 加1是因为这里样本的标签是1和2

8. RPGenerate

RPGenerate <-
function(p=100 #higher dimension
                    ,d=10   #lower dimension
                    ,method = "Haar" # Random projection method
                    ,B2 = 10 )
    {
     if (p < d) stop("d must be less than p")
         if (method == "Gaussian")
            {
                Q <-matrix(1/sqrt(p)*rnorm(p*d*B2,0,1),p,d*B2)
            }   
        if (method == "Haar")
            {
                R0 <- matrix(1/sqrt(p)*rnorm(p*d*B2,0,1),p,d*B2)
                Q <- matrix(sapply(1:B2-1, FUN = function(s){qr.Q(qr(R0[,s*d+1:d]))[,1:d]}),p,B2*d)
            }           
        if (method == "axis")
            {
          Q <- NULL
                for(i in 1:B2)
                {
                S <- sample(1:p,d)
                R <- matrix(0,p,d)
                for (D in 1:d)
                    {
                        R[S[D],D] <- 1
                }
                Q <- cbind(Q,R)
                }
            }
       if (method == "other")
            {
                Q <- matrix(0,p,d)
            }
        return(Q)
    }

这里有三种方式来产生随机矩阵投影。返回的都是Q

8.1. 高斯随机投影

if (method == "Gaussian")
            {
                Q <-matrix(1/sqrt(p)*rnorm(p*d*B2,0,1),p,d*B2)
            }
  • rnorm(n, mean = 0, sd = 1)里,n 为产生随机值个数(长度),mean 是平均数, sd 是标准差,rnorm()函数是随机正态分布
  • 1/sqrt(p)是为了归一化

8.2.Harr 随机投影

em算法的r语言实现PPT em算法r语言代码_em算法的r语言实现PPT_06

if (method == "Haar")
            {
                R0 <- matrix(1/sqrt(p)*rnorm(p*d*B2,0,1),p,d*B2)
                Q <- matrix(sapply(1:B2-1, FUN = function(s){qr.Q(qr(R0[,s*d+1:d]))[,1:d]}),p,B2*d)
            }
  • R0同高斯随机投影矩阵
  • Q在这里只是1到B2-1而已
  • sapply的括号里有(1:B2-1, FUN = function(s){qr.Q(qr(R0[,s*d+1:d]))[,1:d]})。function(s)的是{qr.Q(qr(R0[,s*d+1:d]))[,1:d]}
  • QR分解是直接调用默认的。QR分解把矩阵分解成一个正交矩阵与一个上三角矩阵的积。qr.Q, qr.R, qr.X 表示矩阵的重建

    参考案例
inputdata <- read.csv("temp.csv", sep = ",", header = T)
inputdata <- as.matrix(inputdata)
qrresult <- NULL
#能进行QR分解的矩阵必须列列满秩或行满秩(转置为列满秩)
if (rankMatrix(inputdata) == min(dim(inputdata)))
{  
  qrresult <- qr(inputdata)  #进行qr分解
}else{
  stop("Don't meet the needs of QR")
}
Qreslt <- qr.Q(qrresult)  #获取QR分解的Q
Rreslt <- qr.R(qrresult)  #获取QR分解的R
Xreslt <- qr.X(qrresult)  #通过QR分解获取原矩阵
  • 直接对输入的X进行QR分解得到的qrresult。qrresult是与X维度相同的矩阵。上面的三角形矩阵包含分解的R,下面的三角形包含分解的Q(以紧凑的形式进行存储)
  • 调用qr.Q(),qr.R(),qr.X()可以看到需要的。qr.Q的括号里有(qr(R0[,s*d+1:d]))
  • (qr(R0[,s*d+1:d]))再进行分析,qr的括号里有(R0[,s*d+1:d])
  • 也就是
  1. 对p行d*B2列的R0,省略掉第1个d组块儿,对余下的每个块儿进行QR分解
  2. 每个块儿分解得到的,以紧凑形式储存的的矩阵,获取Q。对这个Q只要1:d列的
  3. 进行重拼即可得到所需要的Q

8.3. Axis 随机投影

if (method == "axis")
            {
          Q <- NULL
                for(i in 1:B2)
                {
                S <- sample(1:p,d)
                R <- matrix(0,p,d)
                for (D in 1:d)
                    {
                        R[S[D],D] <- 1
                }
                Q <- cbind(Q,R)
                }
            }
  • S是从1到p里无条件地抽d个
  • R是一个全0的p×d大小的矩阵
  • 把R的S行,对应的D列的元素换成1【使用会出现置换与提取】
  • 记录下来,整够B2个

9. RPModel

先贴代码

RPModel <-
function(Model.No #Model number
                , n # sample size
                , p # dimension
                , Pi = 1/2 # class 1 prior
                    )
{
   if (Model.No == 1)
    {
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      Y11 <- rmultinom(1,Y1[1,1],c(1/2,1/2))
      Y22 <- rmultinom(1,Y1[2,1],c(1/2,1/2))
      mu1 <- c(2, 2, rep(0,p-2))
      mu2 <- c(2,-2, rep(0,p-2))
      if(p == 100) {X1 <- rbind(t(matrix(mu1,p,Y11[1,1])),t(matrix(-mu1,p,Y11[2,1]))) + matrix(rnorm(Y1[1,1]*p),Y1[1,1],p)}
      if(p == 100) {X2 <- rbind(t(matrix(mu2,p,Y22[1,1])),t(matrix(-mu2,p,Y22[2,1]))) + matrix(rnorm(Y1[2,1]*p),Y1[2,1],p)}
      if(p == 1000) {X1 <- rbind(t(matrix(mu1,p,Y11[1,1])),t(matrix(-mu1,p,Y11[2,1]))) + cbind(matrix(rnorm(Y1[1,1]*2),Y1[1,1],2), matrix(rnorm(Y1[1,1]*(p-2))/4,Y1[1,1],p-2))}
      if(p == 1000) {X2 <- rbind(t(matrix(mu2,p,Y22[1,1])),t(matrix(-mu2,p,Y22[2,1]))) + cbind(matrix(rnorm(Y1[2,1]*2),Y1[2,1],2), matrix(rnorm(Y1[2,1]*(p-2))/4,Y1[2,1],p-2))}
      X <- rbind(X1,X2)
    }
    if (Model.No == 2)
    {
      R <- NULL
      if(p == 100) data(R, envir = environment())
      #if(p == 1000) load(R1000.RData)
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(3,3), rep(0,p-3))
      Sigma1 <- 0.5*diag(c(rep(1,3),rep(0,p-3)))+0.5*c(rep(1,3),rep(0,p-3))%*%t(c(rep(1,3),rep(0,p-3))) + 0.5*diag(c(rep(0,3),rep(1,p-3)))+0.5*c(rep(0,3),rep(1,p-3))%*%t(c(rep(0,3),rep(1,p-3)))
      Sigma2 <- 1.5*diag(c(rep(1,3),rep(0,p-3)))+0.5*c(rep(1,3),rep(0,p-3))%*%t(c(rep(1,3),rep(0,p-3))) + 0.5*diag(c(rep(0,3),rep(1,p-3)))+0.5*c(rep(0,3),rep(1,p-3))%*%t(c(rep(0,3),rep(1,p-3)))
      X1 <- mvrnorm(Y1[1,1],R%*%rep(0,p),R%*%Sigma1%*%t(R))
      X2 <- mvrnorm(Y1[2,1],R%*%mu,R%*%Sigma2%*%t(R))
      X <- rbind(X1,X2)
      rm(R, envir = environment())
    }
    if (Model.No == 3)
    {
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(1/sqrt(p),p/2),rep(0,p/2))
      X1 <- cbind(matrix(sample(c(-1,1),Y1[1,1]*p, replace = TRUE)*rexp(Y1[1,1]*p,1),Y1[1,1],p))
      X2 <- mvrnorm(Y1[2,1],mu,diag(p))
      X  <- rbind(X1,X2)
    }
    if (Model.No == 4)
    {
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(1,10),rep(0,p-10))
      U1 <- rchisq(Y1[1,1],1)
      U2 <- rchisq(Y1[2,1],2)
      Sigma1 <- diag(p)
      Sigma2 <- 0.5*diag(p)+0.5*c(rep(1,10),rep(0,p-10))%*%t(c(rep(1,10),rep(0,p-10))) + 0.5*diag(c(rep(0,10),rep(1,p-10)))
      X1 <- mvrnorm(Y1[1,1],rep(0,p),Sigma1)/sqrt(U1/1)
      X2 <- t(mu + t(mvrnorm(Y1[2,1],rep(0,p),Sigma2)/sqrt(U2/2)))
      X  <- rbind(X1,X2)
    }
  
    return(list(x=X,y=Y))
  }

9.1. 函数调用

function(Model.No #Model number
                , n # sample size
                , p # dimension
                , Pi = 1/2 # class 1 prior
                    )

9.2. 模型1

em算法的r语言实现PPT em算法r语言代码_机器学习_07

if (Model.No == 1)
    {
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      Y11 <- rmultinom(1,Y1[1,1],c(1/2,1/2))
      Y22 <- rmultinom(1,Y1[2,1],c(1/2,1/2))
      mu1 <- c(2, 2, rep(0,p-2))
      mu2 <- c(2,-2, rep(0,p-2))
      if(p == 100) {X1 <- rbind(t(matrix(mu1,p,Y11[1,1])),t(matrix(-mu1,p,Y11[2,1]))) + matrix(rnorm(Y1[1,1]*p),Y1[1,1],p)}
      if(p == 100) {X2 <- rbind(t(matrix(mu2,p,Y22[1,1])),t(matrix(-mu2,p,Y22[2,1]))) + matrix(rnorm(Y1[2,1]*p),Y1[2,1],p)}
      if(p == 1000) {X1 <- rbind(t(matrix(mu1,p,Y11[1,1])),t(matrix(-mu1,p,Y11[2,1]))) + cbind(matrix(rnorm(Y1[1,1]*2),Y1[1,1],2), matrix(rnorm(Y1[1,1]*(p-2))/4,Y1[1,1],p-2))}
      if(p == 1000) {X2 <- rbind(t(matrix(mu2,p,Y22[1,1])),t(matrix(-mu2,p,Y22[2,1]))) + cbind(matrix(rnorm(Y1[2,1]*2),Y1[2,1],2), matrix(rnorm(Y1[2,1]*(p-2))/4,Y1[2,1],p-2))}
      X <- rbind(X1,X2)
    }

9.2.1. 模型初始化

  • Y1 <- rmultinom(1,n,c(Pi,1-Pi))这句生成了一个2*1的向量,和为n,且服从c(Pi,1-Pi)的分布。比如,n=500,Pi=0.5,生成(254,246)
  • Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))这句生成500*1的向量。前254个是1,后246个是2(也就是标签)
  • Y11 <- rmultinom(1,Y1[1,1],c(1/2,1/2))比如,这句生成了(128,126)。和为254
  • Y22 <- rmultinom(1,Y1[2,1],c(1/2,1/2))生成了(124,122)。和为246
  • mu1 <- c(2, 2, rep(0,p-2))这句生成了p维向量,只有前两个是2,后面都是0
  • mu2 <- c(2,-2, rep(0,p-2))同,不过第两个是-2

9.2.2. 模型建立

  1. 这里是p=100,标签1的生成:
if(p == 100) {X1 <- rbind(t(matrix(mu1,p,Y11[1,1])),t(matrix(-mu1,p,Y11[2,1]))) + matrix(rnorm(Y1[1,1]*p),Y1[1,1],p)}
  • rbind是根据行进行合并,就是行的叠加,m行的矩阵与n行的矩阵rbind()最后变成m+n行,合并前提:rbind(a, c)中矩阵a、c的列数必需相符(cbind是根据列进行合并,即叠加所有列,m列的矩阵与n列的矩阵cbind()最后变成m+n列,合并前提:cbind(a, c)中矩阵a、c的行数必需相符)
  • rbind的括号里有(t(matrix(mu1,p,Y11[1,1])),t(matrix(-mu1,p,Y11[2,1])))这个测了发现是生成254n,也就是254100的矩阵。X1也是254*100的
  • 再详细些,matrix(-mu1,p,Y11[2,1])是100126,只有前两行全部-2。matrix(mu1,p,Y11[1,1])是100128,只有前两行全部2
  • 添加了噪音matrix(rnorm(Y1[1,1]*p),Y1[1,1],p)随机正态分布并改写成符合的形式
  1. 这里是p=100,标签2的生成
if(p == 100) {X2 <- rbind(t(matrix(mu2,p,Y22[1,1])),t(matrix(-mu2,p,Y22[2,1]))) + matrix(rnorm(Y1[2,1]*p),Y1[2,1],p)}
  • 这里动的是mu2。因为生成标签2
  • 简单来说,就是生成两个矩阵,一个是mu2的复制品,一个是-mu2的复制品。每一块复制几次看根据先验概率抽出来的数
  • 合在一起之后,加噪音
  1. p=1000同理
  2. 最后返回两个标签的数据的情况。每个标签数据的多少也是看根据先验概率抽出来的数
X <- rbind(X1,X2)

9.3. 模型2

em算法的r语言实现PPT em算法r语言代码_机器学习_08


em算法的r语言实现PPT em算法r语言代码_函数调用_09


贴上代码

if (Model.No == 2)
    {
      R <- NULL
      if(p == 100) data(R, envir = environment())
      #if(p == 1000) load(R1000.RData)
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(3,3), rep(0,p-3))
      Sigma1 <- 0.5*diag(c(rep(1,3),rep(0,p-3)))+0.5*c(rep(1,3),rep(0,p-3))%*%t(c(rep(1,3),rep(0,p-3))) + 0.5*diag(c(rep(0,3),rep(1,p-3)))+0.5*c(rep(0,3),rep(1,p-3))%*%t(c(rep(0,3),rep(1,p-3)))
      Sigma2 <- 1.5*diag(c(rep(1,3),rep(0,p-3)))+0.5*c(rep(1,3),rep(0,p-3))%*%t(c(rep(1,3),rep(0,p-3))) + 0.5*diag(c(rep(0,3),rep(1,p-3)))+0.5*c(rep(0,3),rep(1,p-3))%*%t(c(rep(0,3),rep(1,p-3)))
      X1 <- mvrnorm(Y1[1,1],R%*%rep(0,p),R%*%Sigma1%*%t(R))
      X2 <- mvrnorm(Y1[2,1],R%*%mu,R%*%Sigma2%*%t(R))
      X <- rbind(X1,X2)
      rm(R, envir = environment())
    }
  • 这里挺有趣的,是个旋转的稀疏正交
  • Y1把样本分成了两部分,一部分是标签1的数量Y1[1,1],一部分是标签2的数量Y1[2,1]
  • Y建立了这样的标签矩阵
  • mu显然对应论文的em算法的r语言实现PPT em算法r语言代码_赋值_10
  • Sigma1第一项diag(c(rep(1,3),rep(0,p-3))),只有对角线上前三个是1,别的是0.第二项c(rep(1,3),rep(0,p-3))%*%t(c(rep(1,3),rep(0,p-3)))的前em算法的r语言实现PPT em算法r语言代码_赋值_11块是1,别的是0。第三项diag(c(rep(0,3),rep(1,p-3))),对角线上只有前三项是0,别的是1。第四项(rep(0,3),rep(1,p-3))%*%t(c(rep(0,3),rep(1,p-3))),只有最后em算法的r语言实现PPT em算法r语言代码_赋值_12是1,别的是0.最后为:
  • 对角线上全部1,第一个块(三乘三)和第三个(九十七乘九十七)块非对角线元素0.5,第二和第四块是0,也就是

长这样(p=100)

  • 同理,Sigma2长这样
  • 到了生成数据的时候。X1 <- mvrnorm(Y1[1,1],R%*%rep(0,p),R%*%Sigma1%*%t(R))mvrnorm 基于MASS包,产生多元高斯分布的随机数,由于每组随机变量同分布,故产生b组随机数(一行),重复n次,即得到随机样本X。有mvrnorm(n = 1, mu, Sigma, tol = 1e-06, empirical = FALSE)。翻译过来就是生成Y1[1,1]这么多的标签(标签1的数量),mu是em算法的r语言实现PPT em算法r语言代码_赋值_13,sigma是em算法的r语言实现PPT em算法r语言代码_函数调用_14。R在这里是旋转矩阵。建议直接把R拿来用。
  • X2同理

9.3.2. 模型初始化

R <- NULL
      if(p == 100) data(R, envir = environment())
      #if(p == 1000) load(R1000.RData)
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(3,3), rep(0,p-3))
  • 这里是导入数据的,和模型1差不多
  • Sigma1 <- 0.5*diag(c(rep(1,3),rep(0,p-3)))+0.5*c(rep(1,3),rep(0,p-3))%*%t(c(rep(1,3),rep(0,p-3))) + 0.5*diag(c(rep(0,3),rep(1,p-3)))+0.5*c(rep(0,3),rep(1,p-3))%*%t(c(rep(0,3),rep(1,p-3)))是sigma1。用%*% 表示矩阵乘法而不是用单独的星号表示, 注意矩阵乘法要求左边的矩阵的列数等于右边的矩阵的行数
  • 其他的差不多

9.4. 模型3

em算法的r语言实现PPT em算法r语言代码_函数调用_15


贴上代码

{
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(1/sqrt(p),p/2),rep(0,p/2))
      X1 <- cbind(matrix(sample(c(-1,1),Y1[1,1]*p, replace = TRUE)*rexp(Y1[1,1]*p,1),Y1[1,1],p))
      X2 <- mvrnorm(Y1[2,1],mu,diag(p))
      X  <- rbind(X1,X2)
    }

这里就是放飞自我(?)了

  • Y1同模型1
  • mu 符合模型需要
  • X1的cbind是列相加(2333)
  • sample是抽样函数。

em算法的r语言实现PPT em算法r语言代码_赋值_16

em算法的r语言实现PPT em算法r语言代码_机器学习_17


对应sample(c(-1,1),Y1[1,1]*p, replace = TRUE),在{-1,1}里抽Y1[1,1]×p次,这抽法还蛮鬼畜的

  • rexp有
  • mvrnorm 基于MASS包,产生多元高斯分布的随机数,由于每组随机变量同分布,故产生b组随机数(一行),重复n次,即得到随机样本X

9.5. 模型4

em算法的r语言实现PPT em算法r语言代码_机器学习_18


贴代码

if (Model.No == 4)
    {
      Y1 <- rmultinom(1,n,c(Pi,1-Pi))
      Y <- c(rep(1,Y1[1,1]),rep(2,Y1[2,1]))
      mu <- c(rep(1,10),rep(0,p-10))
      U1 <- rchisq(Y1[1,1],1)
      U2 <- rchisq(Y1[2,1],2)
      Sigma1 <- diag(p)
      Sigma2 <- 0.5*diag(p)+0.5*c(rep(1,10),rep(0,p-10))%*%t(c(rep(1,10),rep(0,p-10))) + 0.5*diag(c(rep(0,10),rep(1,p-10)))
      X1 <- mvrnorm(Y1[1,1],rep(0,p),Sigma1)/sqrt(U1/1)
      X2 <- t(mu + t(mvrnorm(Y1[2,1],rep(0,p),Sigma2)/sqrt(U2/2)))
      X  <- rbind(X1,X2)
    }
  • 类似模型2

9.6. 总结

返回return(list(x=X,y=Y))

10.代码阅读总结与思考

1.使用验证集,测试集往训练集建立的模型上套的时候请注意,数据处理方式要相同。模型不止是分类器