Skip to content

Commit 702bd01

Browse files
committed
RFCT Use predict on 2d arrays instead of loop
Simplify the code in general.
1 parent 4e34782 commit 702bd01

File tree

2 files changed

+17
-13
lines changed

2 files changed

+17
-13
lines changed

ch07/boston_cv10_penalized.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
# This script fits several forms of penalized regression
99

1010
from __future__ import print_function
11+
import numpy as np
1112
from sklearn.cross_validation import KFold
1213
from sklearn.linear_model import ElasticNet, Lasso, Ridge
13-
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
14-
import numpy as np
1514
from sklearn.datasets import load_boston
1615
boston = load_boston()
17-
x = np.array([np.concatenate((v, [1])) for v in boston.data])
16+
x = boston.data
1817
y = boston.target
1918

2019
for name, met in [
@@ -26,7 +25,7 @@
2625
met.fit(x, y)
2726

2827
# Predict on the whole data:
29-
p = np.array([met.predict(xi) for xi in x])
28+
p = met.predict(x)
3029

3130
e = p - y
3231
# np.dot(e, e) == sum(ei**2 for ei in e) but faster
@@ -38,7 +37,7 @@
3837
err = 0
3938
for train, test in kf:
4039
met.fit(x[train], y[train])
41-
p = np.array([met.predict(xi) for xi in x[test]])
40+
p = met.predict(x[test])
4241
e = p - y[test]
4342
err += np.dot(e, e)
4443

ch07/cv10_lr.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,37 @@
55
#
66
# It is made available under the MIT License
77

8+
import numpy as np
89
from sklearn.cross_validation import KFold
910
from sklearn.linear_model import LinearRegression, ElasticNet
10-
import numpy as np
1111
from sklearn.datasets import load_boston
1212
boston = load_boston()
13-
x = np.array([np.concatenate((v, [1])) for v in boston.data])
13+
x = boston.data
1414
y = boston.target
15+
16+
17+
# Switch this variable to use an Elastic Net instead of OLS
1518
FIT_EN = False
1619

1720
if FIT_EN:
1821
model = ElasticNet(fit_intercept=True, alpha=0.5)
1922
else:
2023
model = LinearRegression(fit_intercept=True)
24+
2125
model.fit(x, y)
22-
p = np.array([model.predict(xi) for xi in x])
23-
e = p - y
24-
total_error = np.dot(e, e)
25-
rmse_train = np.sqrt(total_error / len(p))
26+
rmse_train = np.sqrt(model.residues_/len(x))
27+
28+
# Alternatively, we could have computed rmse_train using this expression:
29+
# rmse_train = np.sqrt(np.mean( (model.predict(x) - y) ** 2))
30+
# The results are equivalent
2631

2732
kf = KFold(len(x), n_folds=10)
2833
err = 0
2934
for train, test in kf:
3035
model.fit(x[train], y[train])
31-
p = np.array([model.predict(xi) for xi in x[test]])
36+
p = model.predict(x[test])
3237
e = p - y[test]
33-
err += np.dot(e, e)
38+
err += np.dot(e, e) # This is the same as np.sum(e * e)
3439

3540
rmse_10cv = np.sqrt(err / len(x))
3641
print('RMSE on training: {}'.format(rmse_train))

0 commit comments

Comments
 (0)