测试数据:
//############################### data table ##########################
// Outlook Temp Humidity Windy Play Golf
// Rainy Hot High False No
// Overoast Hot High True No
// Sunny Mild High False Yes
// Sunny Cool Normal False Yes
// Sunny Cool Normal True No
// Overoast Cool Normal True Yes
// Rainy Mild High False No
// Rainy Cool Normal False Yes
// Sunny Mild Normal False Yes
// Rainy Mild Normal True Yes
// Overoast Mild High True Yes
// Overoast Hot Normal False Yes
// Sunny Mild High True No
//######################################################################
测试代码:void Main()
{
var e_total = E(5,9);
var e_Outlook = E2(new []
{
new []{3,2},
new []{4,0},
new []{2,3}
},
14.0
);
Console.WriteLine("gain of outlook:" + (e_total-(e_Outlook)));
var e_Temp = E2(new []
{
new []{2,2},
new []{4,2},
new []{3,1}
},
14.0
);
Console.WriteLine("gain of temperature:"+ (e_total-e_Temp));
var e_Humidity = E2(new []
{
new []{3,4},
new []{6,1}
},
14.0
);
Console.WriteLine("gain of humidity:"+ (e_total-e_Humidity));
var e_windy = E2(new []
{
new []{3,3},
new []{6,2}
},
14.0
);
Console.WriteLine("gain of windy:"+ (e_total-e_windy));
//Max Gaining wins
}
static double E2(dynamic pairs, double total){
var r = 0.0;
for(var i =0 ;i < pairs.Length; i++){
var p_i = (pairs[i][0] + pairs[i][1])/total;
var e_i = E(pairs[i][0] ,pairs[i][1]);
r += p_i * e_i;
}
return r;
}
static double E(int x1, int x2){
if(x1 == 0 || x2 ==0){return 0;}
var p1 = x1 * 1.0/(x1+x2);
var p2 = x2 * 1.0/(x1+x2);
return -p1*Math.Log(p1,2) - p2*Math.Log(p2,2);
}
参考链接:
http://www.saedsayad.com/decision_tree.htm