4,647
社区成员
发帖
与我相关
我的任务
分享
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LinearRegression
#houseprice= pd.read_csv('boston_housing.csv')
# 指定要读取的CSV文件路径
csv_file = 'boston_housing.csv'
# 使用np.genfromtxt函数读取CSV文件并将其转换为NumPy数组
housedata = np.genfromtxt(csv_file, delimiter=',')
X = housedata
y = housedata[:,13]
feature_names = np.array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'])
feature_names
#y.shape
index = np.arange(506)
np.random.shuffle(index)
index
train_index = index [:405]
#train_index.shape
test_index = index [405:]
#test_index.shape
X_train = X[train_index]
y_train = y[train_index]
X_train.shape
X_test = X[test_index]
y_test = y[test_index]
display(X_test.shape,y_test.shape)
#np.set_printoptions(suppress = True)
model = LinearRegression(fit_intercept= True)
model.fit(X_train,y_train)
display(model.coef_,model.intercept_)
y_train
index
#X_train[100]
#θ = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y).round(4)
#print('二元斜率截距分别是',w,b)
#print('二通过正规方程球的结果',θ.reshape(-1))
#display(houseprice)
运行后
array([-0., -0., -0., 0., 0., 0., -0., -0., -0., -0., -0., -0., -0.,
1.])
1.4921397450962104e-13
14个回归系数和截距都为0,咋回事,来个大神救我。