from sklearn import datasets
diabetes = datasets.load_diabetes()
pd.DataFrame(diabetes.target).head()
pd.DataFrame(diabetes.data).head()
diabetes.feature_names
print(diabetes.DESCR)
数据调整
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, Ridge, ElasticNet
#导入数据
df = pd.read_csv('Regression/Regression8/diabetes.csv')
features = list(df.columns)
features.remove('y')
labels = ['y']
df.sample(n=5)
df.info()
lamb = 0.5 #参数
lasso_reg = Lasso(alpha=lamb)
#对10个原始自变量做回归
lasso_reg.fit(df[features[1:11]], df[labels])
print('截距\n', lasso_reg.intercept_)
print('自变量系数\n', lasso_reg.coef_)
lamb = 0.1 #参数
Ridge_reg = Ridge(alpha=lamb)
Ridge_reg.fit(df[features[1:11]], df[labels])
print('截距\n', Ridge_reg.intercept_)
print('自变量系数\n', Ridge_reg.coef_)
非0项
lamb = 0.1
lasso_reg2 = Lasso(alpha=lamb)
lasso_reg2.fit(df[features], df[labels])
print('截距\n', lasso_reg2.intercept_)
print('自变量系数\n', lasso_reg2.coef_)
非0元
个数210元
个数43非0元
较多非0元
较少lamb=0.1
ElasticNet_reg = ElasticNet(alpha=lamb, l1_ratio=0.95)
ElasticNet_reg.fit(df[features], df[labels])
print('截距\n', ElasticNet_reg.intercept_)
print('自变量系数\n', ElasticNet_reg.coef_.T)
非0元
个数380元
个数26非0元素
个数较为折中from sklearn.linear_model import LassoCV,RidgeCV,ElasticNetCV
lasso_reg = LassoCV(cv=20).fit(df[features], df[labels])
print('超参数\n', lasso_reg.alpha_)
因篇幅问题不能全部显示,请点此查看更多更全内容