|
| 1 | +# This code is supporting material for the book |
| 2 | +# Building Machine Learning Systems with Python |
| 3 | +# by Willi Richert and Luis Pedro Coelho |
| 4 | +# published by PACKT Publishing |
| 5 | +# |
| 6 | +# It is made available under the MIT License |
| 7 | + |
| 8 | +# Basic imports |
| 9 | +import numpy as np |
| 10 | +from load import load_dataset |
| 11 | + |
| 12 | + |
| 13 | +# Import sklearn implementation of KNN |
| 14 | +from sklearn.neighbors import KNeighborsClassifier |
| 15 | + |
| 16 | +features, labels = load_dataset('seeds') |
| 17 | +classifier = KNeighborsClassifier(n_neighbors=4) |
| 18 | + |
| 19 | + |
| 20 | +n = len(features) |
| 21 | +correct = 0.0 |
| 22 | +for ei in range(n): |
| 23 | + training = np.ones(n, bool) |
| 24 | + training[ei] = 0 |
| 25 | + testing = ~training |
| 26 | + classifier.fit(features[training], labels[training]) |
| 27 | + pred = classifier.predict(features[ei]) |
| 28 | + correct += (pred == labels[ei]) |
| 29 | +print(correct/n) |
| 30 | + |
| 31 | +# Import KFold object |
| 32 | +from sklearn.cross_validation import KFold |
| 33 | + |
| 34 | +# means will hold the mean for each fold |
| 35 | +means = [] |
| 36 | + |
| 37 | +# kf is a generator of pairs (training,testing) so that each iteration |
| 38 | +# implements a separate fold. |
| 39 | +kf = KFold(len(features), n_folds=3, shuffle=True) |
| 40 | +for training,testing in kf: |
| 41 | + # We learn a model for this fold with `fit` and then apply it to the |
| 42 | + # testing data with `predict`: |
| 43 | + classifier.fit(features[training], labels[training]) |
| 44 | + prediction = classifier.predict(features[testing]) |
| 45 | + |
| 46 | + # np.mean on an array of booleans returns the fraction of correct decisions |
| 47 | + # for this fold: |
| 48 | + curmean = np.mean(prediction == labels[testing]) |
| 49 | + means.append(curmean) |
| 50 | +print(means) |
| 51 | + |
| 52 | +# The function cross_val_score does the same thing as the loop above with a |
| 53 | +# single function call |
| 54 | + |
| 55 | +from sklearn.cross_validation import cross_val_score |
| 56 | +print(cross_val_score(classifier, features, labels)) |
0 commit comments