如何解决ValueError: Found input variables with inconsistent numbers of samples
# -*- coding: UTF-8 -*-
import csv
from sklearn.model_selection import train_test_split
# Read in the csv file and put features into list of dict and list of class label
DataSet = open(r'/home/ly/Desktop/CHY/SCIENCE_DATA/Data_Set_01labelDel0Col.csv', 'rb')
reader = csv.reader(DataSet) # 这个函数可以按行读取内容
headers = reader.next() # 文件的第一行,注释掉会在featureList中把表头也打印出来
# print(headers)
# 创建空列表
featureList = []
labelList = []
for row in reader:
labelList.append(row[len(row) - 1]) # 给labelList增加一列标签值元素,将最后一列元素添加到labelList
rowDict = {}
for i in range(0, len(row) - 1): # 小循环在大循环里面,所以先循环完小循环,在继续下一个大循环
rowDict[i] = row[i] # row[i]表示某一行(row)的第i个数
featureList.append(rowDict)
FeatureList = []
for s in featureList:
ChangeStrToFloat1 = {}
for t in s:
ChangeStrToFloat1[t] = float(s[t])
FeatureList.append(ChangeStrToFloat1)
print FeatureList # <type'list'>
dummyY = [{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1}]
X_train, X_test, y_train, y_test = train_test_split(FeatureList, dummyY, test_size=0.25, random_state=None)
错误提示:
X_train, X_test, y_train, y_test = train_test_split(FeatureList, dummyY, test_size=0.25, random_state=None)
ValueError: Found input variables with inconsistent numbers of samples: [3384, 47]
我想应该是FeatureList的问题吧,这个列表里有47个字典,每个字典里有72个元素,47*72=3384.
然而我还是不知道怎么修正.......