Skip to content

Commit 2d3679c

Browse files
committed
ENH Use sklearn for KNN & cross-validation
This is the same processes that are written "by hand" before, but now using sklearn (this is the end of the chapter).
1 parent 5d0773b commit 2d3679c

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

ch02/seeds_knn_sklearn.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# This code is supporting material for the book
2+
# Building Machine Learning Systems with Python
3+
# by Willi Richert and Luis Pedro Coelho
4+
# published by PACKT Publishing
5+
#
6+
# It is made available under the MIT License
7+
8+
# Basic imports
9+
import numpy as np
10+
from load import load_dataset
11+
12+
13+
# Import sklearn implementation of KNN
14+
from sklearn.neighbors import KNeighborsClassifier
15+
16+
features, labels = load_dataset('seeds')
17+
classifier = KNeighborsClassifier(n_neighbors=4)
18+
19+
20+
n = len(features)
21+
correct = 0.0
22+
for ei in range(n):
23+
training = np.ones(n, bool)
24+
training[ei] = 0
25+
testing = ~training
26+
classifier.fit(features[training], labels[training])
27+
pred = classifier.predict(features[ei])
28+
correct += (pred == labels[ei])
29+
print(correct/n)
30+
31+
# Import KFold object
32+
from sklearn.cross_validation import KFold
33+
34+
# means will hold the mean for each fold
35+
means = []
36+
37+
# kf is a generator of pairs (training,testing) so that each iteration
38+
# implements a separate fold.
39+
kf = KFold(len(features), n_folds=3, shuffle=True)
40+
for training,testing in kf:
41+
# We learn a model for this fold with `fit` and then apply it to the
42+
# testing data with `predict`:
43+
classifier.fit(features[training], labels[training])
44+
prediction = classifier.predict(features[testing])
45+
46+
# np.mean on an array of booleans returns the fraction of correct decisions
47+
# for this fold:
48+
curmean = np.mean(prediction == labels[testing])
49+
means.append(curmean)
50+
print(means)
51+
52+
# The function cross_val_score does the same thing as the loop above with a
53+
# single function call
54+
55+
from sklearn.cross_validation import cross_val_score
56+
print(cross_val_score(classifier, features, labels))

0 commit comments

Comments
 (0)