如何把c++实现的决策树id3算法改为Java?

qq_34923301 2016-05-21 05:18:49
#include<iostream>
#include<string>
#include<vector>
#include<map>
#include<algorithm>
#include<cmath>
using namespace std;
#define MAXLEN 6

vector<vector<string>>state;
vector<string>item(MAXLEN);
vector<string>attribute_row;
string end("end");
string yes("yes");
string no("no");
string blank("");
map<string,vector<string>>map_attribute_values;
int tree_size=0;
struct Node{
string attribute;
string arrived_value;
vector<Node *>childs;
Node(){
attribute=blank;
attrived_value=blank;
}
};
Node * root;

void ComputeMapFrom2DVector(){
unsigned int i,j,k;
bool exited=false;
vector<string>values;
for(i=1;i<MAXLEN-1;i++){
for(j=1;j<state.size();j++){
for(k=0;k<values.size();k++){
if(!values[k].compare(state[j][i])) exited=true;
}
if(!exited){
values.push_back(state[j][i]);
}
exited=false;
}
map_attribute_values[state[0][i]]=values;
values.erase(values.begin(),values.end());
}
}

double ComputeEntropy(vector<vector<string>>remain_state,string attribute,string vlaue,bool ifparent){
vector<int>count(2,0);
unsigned int i,j;
bool done_flag=false;
for(j+1;j<MAXlEN;Jj++){
if(done_flag) break;
if(!attribute_row[j].compare(attribute)){
for(i=1;i<remain_state.size();i++){
if((!ifparent&&!remain_state[i][j].compare(value))||ifparent){
if(!remain_state[i][MAXLEN-1].compare(yes)){
count[0]++;
}
else count[1]++;
}
}
done_flag=true;
}
}
if(count[0]==0||count[1]==0) return 0;
double sum=count[0]+count[1];
double entropy=-count[0]/sum*log(count[0]/sum)/log(2.0)-count[1]/sum*log(count[1]/sum)/log(2.0);
return entropy;
}

double ComputeGain(vactor<vector<string>>remian_state,string attribute){
unsigned int j,k,m;
double parent_entropy=ComputeEntropy(remian_state,attribute,blank,true);
double children_entropy=0;
vector<string>values=map_attribute_values[attribute];
vector<double>ratio;
vetor<it>count_values;
int tempint;
for(m=0;m<values.size();m++){
tempint=0;
for(k=1;k<MAXLEN-1;k++){
if(!attribute_row[k].compare(attribute)){
for(j=1;j<remain_state.size();j++){
if(!remain_state[j][k].compare.values[m])){
tempint++;
}
}
}
}
count_values.push_back(tempint);
}
for(j=0;j<values.size();j+=){
ratio.push_back((double)count_values[j]/(double(remian_state.size()-1));
)
double temp_entropy;
for(j=0;j<values.size();j++){
temp_entropy=ComputeEntropy(remain_state,attribute,values[j],false);
children_entropy+=ratio[j]*temp_entropy;
}
return (parent_entropy-children_entropy);
}

int FindAttriNumByName(string attri){
for(int i=0;i<MAXLEN;i++){
if(!state[0][i].compare(attri)) return i;
}
cerr<<"can't find the numth of attribute"<<endl;
return 0;
}

string MostCommonLabel(vactor<vector<string>>remain_state){
int p=0,n=0;
for(unsigned i=0;i<remain_state.size();i++){
if(!remian_state[i][MAXLEN-1].compare(yes))p++;
else n++;
}
if(p>=n) return yes;
else return no;
}

bool AllTheSameLabel(vactor<vector<string>>remain_state,string label){
int count=0;
for(unsigned int i=0;i<remain_state.size();i++){
if(!remain_state[i][MAXLEN-1].compare(label))count++;
}
if(count==remain_state.size()-1) return true;
else return false;
}

Node*BulidDecisionTreeDFS(Node*p,vector<vector<string>>remian_state,vector<string>remian_attribute){
if(p==NULL)
p=new Node();
if(AllTheSameLabel(remain_state,yes)){
p->attribute=yes;
return p;
}
if(AllTheSameLabel(remain_state,no)){
p->attribute=no;
return p;
}
if(remain_attribute.size()==0){
string label=MostCommonLabel(remain_state);
p->attribute=label;
return p;
}

double max_gain=0,temp_gain;
vector<string>::iterator max-it=remain_attribute.begin();
vector<string>::iterator it1;
for(it1=remain_attribute.begin();it1<remain_attribute.end();it1++){
temp_gain=ComputeGain(remain_state,(*it1));
if(temp_gain>max_gain){
max_gain=temp_gain;
max_it=it1;
}
}
vector<string>new_attribute;
vector<vector<string>>new_state;
for(vector<string>::iterator it2=remain_attribute.begin();it2<remain_attribute.end();it2++){
if((*it2).compare(*max_it))new_attribute.push_back(*it2);
}
p->attribute=*max_it;
vector<string>values=map_attribute_values[*max_it];
int attribute_num=FindAttriNumByName(*max_it);
new_state.push_back(attribute_row);
for(vector<string>::iterator it3=values.begin();it3<values.end();it3++){
for(unsigned int i=1;i<remian_state.size();i++){
if(!remain_state[i][attribute_num].compare(*it3)){
new_state.push_back(remain_state[i]);
}
}
Node*new_node=new Node();
new_node->arrived_value=*it3;
if(new_state.size()==0){
new_node->attribute=MostCommonLabel(remain_state);
}
else
BulidDecisionTreeDFS(new_node,new_state,new_attribute);
p->childs.push_back(new_node);
new_state.erase(new_state.begin()+1,new_statte.end());
}
return p;
}
void Input(){
string s;
while(cin>>s,s.compare()end!=0){
item[0]=s;
for(int i=1;i<MAXLEN;i++){
cin>>item[i];
}
state.push_back(item);
}
for(int j=0;j<MAXLEN;j++){
attribute_row.push_back(state[0][j]);
}
}

void PrintTree(Node*p,int depth){
for(int i=0;i<depth;i++) cout<<'\t';
if(!p->arrived_value.empty()){
cout<<p->arrived_value<<endl;
for(int i=0;i<depth+1;i++) cout<<'\t';
}
cout<<p->attribute,,endl;
for(vector<Node*>::iterator it=p->childs.begin();it!=p->
childs.end();it++){
PrintTree(*it,depth+1);
}
}

void FreeTree(Node*p){
if(p==NULL)
return;
for(vector<Node*>::iterator it=p->childs.begin();it!=p->
childs.end();it++){
FreeTree(*it);
}
delete p;
tree_size++;
}

int main(){
Input();
vector<string>remain_attribute;

string outlook("Outlook");
string Temperature("Temperature");
string Humidity("Humidity");
stirng Wind("Wind");
remain_attribute.push_back(outlook);
remain_attribute.push_back(Temperature);
remain_attribute.push_back(Humidity);
remain_attribute.push_back(Wind);
vector<vector<string>>remain_state;
for(unsigned int i=0;i<state.size();i++){
remain_state.push_back(state[i]);
}
ComputeMapFrom2DVector();
root=BuildDectionTressDFS(root,remain_state,remain_attribute);
cout<<"the decison tree is:"<<endl;
PrintTree(root,0);
FreeTree(root);
cout<<endl;
cout<<"tree_size:"<<tree_size<<endl;
return 0;
}
...全文
130 回复 打赏 收藏 转发到动态 举报
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复

23,407

社区成员

发帖
与我相关
我的任务
社区描述
Java 非技术区
社区管理员
  • 非技术区社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧