# 线性回归—寻找练习时长与成绩之间的关系

## 线性回归

J(θ) 越小，说明我们的预测将会越准确。所以问题求解转换成了求使得J(θ)最小的θ值，即minJ(θ)

## 线性回归Python实现

### 代价函数

def compute_cost(X, y, theta):
m = y.size
prediction = X.dot(theta) - y
sqr = np.power(prediction, 2)
cost = (1 / (2 * m)) * np.sum(sqr)
return cost

def plot_J_history(X, y):
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-1, 4, 100)
J_vals = np.zeros((theta0_vals.size, theta1_vals.size))
for i in range(theta0_vals.size):
for j in range(theta1_vals.size):
theta = np.array([theta0_vals[i], theta1_vals[j]])
t = compute_cost(X, y, theta)
J_vals[i, j] = t
theta_x, theta_y = np.meshgrid(theta0_vals, theta1_vals)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(theta_x, theta_y, J_vals)
ax.set_xlabel(r'$\theta$0')
ax.set_ylabel(r'$\theta$1')
plt.show()
plotData.plot_J_history(X, y)

### 递归下降

def gradient_descent(X, y, theta, alpha, num_iters):
m = y.size
J_history = np.zeros((num_iters))
for i in range(0, num_iters):
prediction = X.dot(theta) - y
delta = prediction.dot(X)
theta = theta - alpha * (1 / m) * delta
J_history[i] = compute_cost(X, y, theta)
return theta, J_history

theta = np.zeros((2,))
iterations = 1500
alpha = 0.01
theta, J_history = gradient_descent(X, y, theta, alpha, iterations)

### 最后

from sklearn.linear_model import LinearRegression
... 导入训练数据
regressor = LinearRegression()
regressor = regressor.fit(X_train, Y_train)