|
5 | 5 | # |
6 | 6 | # It is made available under the MIT License |
7 | 7 |
|
| 8 | +import numpy as np |
8 | 9 | from sklearn.cross_validation import KFold |
9 | 10 | from sklearn.linear_model import LinearRegression, ElasticNet |
10 | | -import numpy as np |
11 | 11 | from sklearn.datasets import load_boston |
12 | 12 | boston = load_boston() |
13 | | -x = np.array([np.concatenate((v, [1])) for v in boston.data]) |
| 13 | +x = boston.data |
14 | 14 | y = boston.target |
| 15 | + |
| 16 | + |
| 17 | +# Switch this variable to use an Elastic Net instead of OLS |
15 | 18 | FIT_EN = False |
16 | 19 |
|
17 | 20 | if FIT_EN: |
18 | 21 | model = ElasticNet(fit_intercept=True, alpha=0.5) |
19 | 22 | else: |
20 | 23 | model = LinearRegression(fit_intercept=True) |
| 24 | + |
21 | 25 | model.fit(x, y) |
22 | | -p = np.array([model.predict(xi) for xi in x]) |
23 | | -e = p - y |
24 | | -total_error = np.dot(e, e) |
25 | | -rmse_train = np.sqrt(total_error / len(p)) |
| 26 | +rmse_train = np.sqrt(model.residues_/len(x)) |
| 27 | + |
| 28 | +# Alternatively, we could have computed rmse_train using this expression: |
| 29 | +# rmse_train = np.sqrt(np.mean( (model.predict(x) - y) ** 2)) |
| 30 | +# The results are equivalent |
26 | 31 |
|
27 | 32 | kf = KFold(len(x), n_folds=10) |
28 | 33 | err = 0 |
29 | 34 | for train, test in kf: |
30 | 35 | model.fit(x[train], y[train]) |
31 | | - p = np.array([model.predict(xi) for xi in x[test]]) |
| 36 | + p = model.predict(x[test]) |
32 | 37 | e = p - y[test] |
33 | | - err += np.dot(e, e) |
| 38 | + err += np.dot(e, e) # This is the same as np.sum(e * e) |
34 | 39 |
|
35 | 40 | rmse_10cv = np.sqrt(err / len(x)) |
36 | 41 | print('RMSE on training: {}'.format(rmse_train)) |
|
0 commit comments