# The Normal Equation
X = 2 * np.random.rand(100,1)
y = 4 + 3 * X + np.random.randn(100,1)
#np.random.rand通过本函数可以返回一个或一组服从标准正态分布的随机样本值。
1)当函数括号内没有参数时,则返回一个浮点数;
2)当函数括号内有一个参数时,则返回秩为1的数组,不能表示向量和矩阵;
3)当函数括号内有两个及以上参数时,则返回对应维度的数组,能表示向量或矩阵;
4)np.random.standard_normal()函数与np.random.randn()类似,但是np.random.standard_normal()
的输入参数为元组(tuple).
5)np.random.randn()的输入通常为整数,但是如果为浮点数,则会自动直接截断转换为整数。
#Compute normalization
X_b = np.c_[np.ones((100,1)),X]
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print('theta_best',theta_best)
X_new = np.array([[0],[2]])
X_new_b = np.c_[np.ones((2,1)),X_new]
y_predict = X_new_b.dot(theta_best)
print('y_predict',y_predict)
plt the model predictions
plt.plot(X_new,y_predict,'r-')
#using Scikit-Learn
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
print(lin_reg.intercept_,lin_reg.coef_)
print(lin_reg.predict(X_new))
theta_best_svd,residuals,rank,s = np.linalg.lstsq(X_b,y, rcond=1e-6)
返回矩阵A的1-范数可逆的条件数。对于好条件矩阵A,rcond(A)是接近1的数。对于差条件矩阵A,rcond(A)是接近0的数。和cond相比,rcond(A)在对估计矩阵条件数上更有效率,但更不可靠。
print('Pesudoinverse',np.linalg.pinv(X_b).dot(y))
伪逆矩阵更加常用的定义(基于SVD奇异值分解):
SVD公式:
伪逆矩阵公式:
这个公式要注意的是中间的的求法。因为是一个对角线矩阵,但又不一定是方阵,所以计算它的伪逆矩阵的步骤是特殊又简单的:
将对角线上的元素取倒数
再将整个矩阵转置一次
#Gradient Descent
#Batch Gradient Descent
theta = np.random.randn(2,1)
for iteration in range(n_interations):
gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
theta = theta - eta * gradients
#Stochastic Gradient Desent
#using SGD
from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(max_iter=1000,tol=1e-3,penalty=None,eta0=0.1)
print(sgd_reg.intercept_,sgd_reg.coef_)
X = 6 * np.random.randn(m,1)
y = 0.5 * X**2 + X +2 + np.random.randn(m,1)
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)
lin_reg = LinearRegression()
print(lin_reg.intercept_,lin_reg.coef_)
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
def plot_lerarning_curves(model,X,y):
X_train, X_val,y_train,y_val = train_test_split(X,y,test_size = 0.2)
train_errors,val_errors = [],[]
for m in range(1,len(X_train)):
model.fit(X_train[:m],y_train[:m])
y_train_predict = model.predict(X_train[:m])
y_val_prdict = model.predict(X_val)
train_errors.append(mean_squared_error(y_train[:m],y_train_predict))
val_errors.append(mean_squared_error(y_val,y_val_prdict))
plt.plot(np.sqrt(train_errors),'r-+',linewidth=2,label='train')
plt.plot(np.sqrt(val_errors), 'b-', linewidth=2, label='prediction')
lin_reg = LinearRegression()
plot_lerarning_curves(lin_reg,X,y)
from sklearn.pipeline import Pipeline
polynomial_regression = Pipeline([
('poly_features',PolynomialFeatures(degree=10,include_bias=False)),
('lin_reg',LinearRegression()),