朴素贝叶斯算法
公式
解释(例子)
给出如下训练集:
编号,色泽,根蒂,敲声,纹理,脐部,触感,好瓜
1,青绿,蜷缩,浊响,清晰,凹陷,硬滑,是
2,乌黑,蜷缩,沉闷,清晰,凹陷,硬滑,是
3,乌黑,蜷缩,浊响,清晰,凹陷,硬滑,是
4,青绿,蜷缩,沉闷,清晰,凹陷,硬滑,是
5,浅白,蜷缩,浊响,清晰,凹陷,硬滑,是
6,青绿,稍蜷,浊响,清晰,稍凹,软粘,是
7,乌黑,稍蜷,浊响,稍糊,稍凹,软粘,是
8,乌黑,稍蜷,浊响,清晰,稍凹,硬滑,是
9,乌黑,稍蜷,沉闷,稍糊,稍凹,硬滑,否
10,青绿,硬挺,清脆,清晰,平坦,软粘,否
11,浅白,硬挺,清脆,模糊,平坦,硬滑,否
12,浅白,蜷缩,浊响,模糊,平坦,软粘,否
13,青绿,稍蜷,浊响,稍糊,凹陷,硬滑,否
14,浅白,稍蜷,沉闷,稍糊,凹陷,硬滑,否
15,乌黑,稍蜷,浊响,清晰,稍凹,软粘,否
16,浅白,蜷缩,浊响,模糊,平坦,硬滑,否
17,青绿,蜷缩,沉闷,稍糊,稍凹,硬滑,否
判断有如下特征的瓜是否好瓜:
青绿,稍蜷,浊响,清晰,凹陷,硬滑
分析
将特征和规律代入贝叶斯公式中:
P(好|青绿,稍蜷,浊响,清晰,凹陷,硬滑)
= P(好)*P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|好) / (P(好)*P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|好) + P(否)*P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|否))
= P(好)*P(青绿|好)*P(稍蜷|好)*P(浊响|好)*P(清晰|好)*P(凹陷|好)*P(硬滑|好) / (P(好)*P(青绿|好)*P(稍蜷|好)*P(浊响|好)*P(清晰|好)*P(,凹陷|好)*P(硬滑|好) + P(否)*P(青绿|否)*P(稍蜷|否)*P(浊响|否)*P(清晰|否)*P(凹陷|否)*P(硬滑|否))
其中特征各项与规律相互独立,故:
P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|好) = P(青绿|好)*P(稍蜷|好)*P(浊响|好)*P(清晰|好)*P(凹陷|好)*P(硬滑|好)
P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|否) = P(青绿|否)*P(稍蜷|否)*P(浊响|否)*P(清晰|否)*P(凹陷|否)*P(硬滑|否)
代码实现:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.io.*;
import java.util.Scanner;
public class Test {
static String filePath = System.getProperty("user.dir")+"\\src\\sources\\data.txt";
static ArrayList<arraylist<string>> data = new ArrayList<arraylist<string>>();
//从文件中读取数据,储存到集合data中
public ArrayList<arraylist<string>> readTable(String filePath){
ArrayList<string> d = null;
ArrayList<arraylist<string>> t = new ArrayList<arraylist<string>>();
File file = new File(filePath);
try {
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
BufferedReader bf = new BufferedReader(isr);
String str = null;
while((str = bf.readLine()) != null) {
d = new ArrayList<string>();
String[] str1 = str.split(",");
for(int i = 1; i < str1.length ; i++) {
d.add(str1[i]);
}
t.add(d);
data = t;
}
bf.close();
isr.close();
} catch (Exception e) {
e.printStackTrace();
System.out.println("文件不存在!");
}
return t;
}
//计算分母的值
public double denominator(String[] str) {
double result = 0;
int countIsHao = 0;
int countIsCha = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;
int count4 = 0;
int count5 = 0;
int count6 = 0;
for(int i = 0; i < data.size() ;i++) {
if(data.get(i).get(6).equals("是")) {
countIsHao++;
}
if(data.get(i).get(6).equals("否")) {
countIsCha++;
}
if(data.get(i).get(0).equals(str[0])) {
count1++;
}
if(data.get(i).get(1).equals(str[1])) {
count2++;
}
if(data.get(i).get(2).equals(str[2])) {
count3++;
}
if(data.get(i).get(3).equals(str[3])) {
count4++;
}
if(data.get(i).get(4).equals(str[4])) {
count5++;
}
if(data.get(i).get(5).equals(str[5])) {
count6++;
}
}
result = (countIsHao/(data.size()*1.0))*(count1 / (countIsHao*1.0))*(count2 / (countIsHao*1.0))*(count3 / (countIsHao*1.0))*(count4 / (countIsHao*1.0))*(count5 / (countIsHao*1.0))*(count6 / (countIsHao*1.0))+
(countIsCha/(data.size()*1.0))*(count1 / (countIsCha*1.0))*(count2 / (countIsCha*1.0))*(count3 / (countIsCha*1.0))*(count4 / (countIsCha*1.0))*(count5 / (countIsCha*1.0))*(count6 / (countIsCha*1.0));
return result;
}
//计算分子的值
public double moleculeIsCold(String hao,String[] strs) {
double result = 0;
int countIsHao = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;
int count4 = 0;
int count5 = 0;
int count6 = 0;
for(int i = 0; i < data.size() ;i++) {
if(data.get(i).get(6).equals(hao)) {
countIsHao++;
}
if(data.get(i).get(0).equals(strs[0]) && data.get(i).get(6).equals(hao)) {
count1++;
}
if(data.get(i).get(1).equals(strs[1]) && data.get(i).get(6).equals(hao)) {
count2++;
}
if(data.get(i).get(2).equals(strs[2]) && data.get(i).get(6).equals(hao)) {
count3++;
}
if(data.get(i).get(3).equals(strs[3]) && data.get(i).get(6).equals(hao)) {
count4++;
}
if(data.get(i).get(4).equals(strs[4]) && data.get(i).get(6).equals(hao)) {
count5++;
}
if(data.get(i).get(5).equals(strs[5]) && data.get(i).get(6).equals(hao)) {
count6++;
}
}
result = (countIsHao/(data.size()*1.0))*(count1 / (countIsHao*1.0))*(count2 / (countIsHao*1.0))*(count3 / (countIsHao*1.0))*(count4 / (countIsHao*1.0))*(count5 / (countIsHao*1.0))*(count6 / (countIsHao*1.0));
return result;
}
//比较好瓜差瓜的概率
public String compared(String[] strs) {
String str = "";
double d1 ,d2 ;
d1 = moleculeIsCold("是",strs)*1.0 / denominator(strs);
d2 = moleculeIsCold("否",strs)*1.0 / denominator(strs);
if(d1 > d2) {
str = "好瓜";
}else {
str = "差瓜";
}
System.out.println("好瓜的概率:"+d1);
System.out.println("差瓜的概率:"+d2);
System.out.println(str);
return str;
}
//测试用例:青绿 稍蜷 浊响 清晰 凹陷 硬滑
public static void main(String[] args) {
Scanner input = new Scanner(System.in);
String[] strs = new String[6];
for (int i = 0;i<6;i++){
strs[i] = input.next();
}
Test ba = new Test();
ba.readTable(filePath);
ba.denominator(strs);
ba.moleculeIsCold("是",strs);
ba.moleculeIsCold("否",strs);
ba.compared(strs);
}
}
运行结果: