做决策树时keyerror=0.0
为啥会报错呀
代码如下
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
iris = load_iris()
Xtrain, Xtest, Ytrain, Ytest = train_test_split(iris['data'], iris['target'], test_size=0.3)
def createDataSet():
dataset = pd.DataFrame(Xtrain,Ytrain)
return dataset
dataset = createDataSet()
def calEnt(dataset):
n = dataset.shape[0]
iset = dataset.iloc[:,-1].value_counts()
p = iset/n
ent = (-p*np.log2(p)).sum()
return ent
def bestSplit(dataset):
bestEnt = calEnt(dataset)
bestGain = 0
axis = -1
for i in range(dataset.shape[1]-1):
levels = dataset.iloc[:,i].value_counts().index
ents = 0
for j in levels:
childSet = dataset[dataset.iloc[:,i] == j]
ent = calEnt(childSet)
ents += (childSet.shape[0]/dataset.shape[0])*ent
infoGain = bestEnt - ents
if (infoGain > bestGain):
bestGain = infoGain
axis = i
return axis
print(bestSplit(dataset))
def mySplit(dataset,axis,value):
col = dataset.columns[axis]
redataset = dataset.loc[dataset[col] == value,:].drop(col,axis = 1)
return redataset
print(mySplit(dataset,axis=1,value=1))
def createTree(dataset):
classlist = dataset.iloc[:,-1].value_counts()
featlist = list(dataset.columns)
if classlist[0] == dataset.shape[0] or dataset.shape[1] == 1:
return classlist.index[0]
axis = bestSplit(dataset)
bestfeat = featlist[axis]
mytree = {bestfeat:{}}
del featlist[axis]
valuelist = set(dataset.iloc[:,axis])
for value in valuelist:
mytree[bestfeat][value] = createTree(mySplit(dataset,axis,value))
return mytree
createTree(dataset)
报错如下
Traceback (most recent call last):
File "C:\Users\olafur\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 384, in pandas._libs.hashtable.Float64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 390, in pandas._libs.hashtable.Float64HashTable.get_item
KeyError: 0.0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm 2019.2.3\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2019.2.3\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/olafur/AppData/Local/Programs/Python/Python37/Lib/site-packages/scratches/scratch_1.py", line 71, in <module>
createTree(dataset)
File "C:/Users/olafur/AppData/Local/Programs/Python/Python37/Lib/site-packages/scratches/scratch_1.py", line 59, in createTree
if classlist[0] == dataset.shape[0] or dataset.shape[1] == 1:
File "C:\Users\olafur\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\series.py", line 1068, in __getitem__
result = self.index.get_value(self, key)
File "C:\Users\olafur\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\indexes\numeric.py", line 420, in get_value
loc = self.get_loc(k)
File "C:\Users\olafur\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\indexes\numeric.py", line 479, in get_loc
return super().get_loc(key, method=method, tolerance=tolerance)
File "C:\Users\olafur\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\indexes\base.py", line 2899, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 384, in pandas._libs.hashtable.Float64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 390, in pandas._libs.hashtable.Float64HashTable.get_item
KeyError: 0.0