hiro106
diff --git a/‎ch02/extra/create_tsv.py‎
Lines changed: 3 additions & 2 deletions b/‎ch02/extra/create_tsv.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎ch02/figure1.py‎
Lines changed: 6 additions & 6 deletions b/‎ch02/figure1.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎ch02/figure2.py‎
Lines changed: 17 additions & 15 deletions b/‎ch02/figure2.py‎
Lines changed: 17 additions & 15 deletions
diff --git a/‎ch02/figure4_5.py‎
Lines changed: 20 additions & 18 deletions b/‎ch02/figure4_5.py‎
Lines changed: 20 additions & 18 deletions
diff --git a/‎ch02/heldout.py‎
Lines changed: 0 additions & 1 deletion b/‎ch02/heldout.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎ch02/knn.py‎
Lines changed: 10 additions & 5 deletions b/‎ch02/knn.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎ch02/load.py‎
Lines changed: 2 additions & 1 deletion b/‎ch02/load.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎ch02/seeds_knn.py‎
Lines changed: 5 additions & 4 deletions b/‎ch02/seeds_knn.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎ch02/seeds_threshold.py‎
Lines changed: 1 addition & 2 deletions b/‎ch02/seeds_threshold.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎ch02/simple_threshold.py‎
Lines changed: 1 addition & 1 deletion b/‎ch02/simple_threshold.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,12 +1,13 @@
 import milksets.iris
 import milksets.seeds
 
+
 def save_as_tsv(fname, module):
     features, labels = module.load()
     nlabels = [module.label_names[ell] for ell in labels]
     with open(fname, 'w') as ofile:
-        for f,n in zip(features, nlabels):
-            print >>ofile, "\t".join(map(str,f)+[n])
+        for f, n in zip(features, nlabels):
+            print >>ofile, "\t".join(map(str, f) + [n])
 
 save_as_tsv('iris.tsv', milksets.iris)
 save_as_tsv('seeds.tsv', milksets.seeds)
@@ -8,14 +8,14 @@
 target = data['target']
 
 
-pairs = [(0,1),(0,2),(0,3),(1,2),(1,3),(2,3)]
-for i,(p0,p1) in enumerate(pairs):
-    plt.subplot(2,3,i+1)
-    for t,marker,c in zip(range(3),">ox","rgb"):
-        plt.scatter(features[target == t,p0], features[target == t,p1], marker=marker, c=c)
+pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
+for i, (p0, p1) in enumerate(pairs):
+    plt.subplot(2, 3, i + 1)
+    for t, marker, c in zip(range(3), ">ox", "rgb"):
+        plt.scatter(features[target == t, p0], features[
+                    target == t, p1], marker=marker, c=c)
     plt.xlabel(feature_names[p0])
     plt.ylabel(feature_names[p1])
     plt.xticks([])
     plt.yticks([])
 plt.savefig('../1400_02_01.png')
-
@@ -13,26 +13,28 @@
 virginica = species == 'virginica'
 
 t = 1.75
-p0,p1 = 3,2
+p0, p1 = 3, 2
 
 if COLOUR_FIGURE:
-    area1c = (1.,.8,.8)
-    area2c = (.8,.8,1.)
+    area1c = (1., .8, .8)
+    area2c = (.8, .8, 1.)
 else:
-    area1c = (1.,1,1)
-    area2c = (.7,.7,.7)
+    area1c = (1., 1, 1)
+    area2c = (.7, .7, .7)
 
-x0,x1 =[features[:,p0].min()*.9,features[:,p0].max()*1.1]
-y0,y1 =[features[:,p1].min()*.9,features[:,p1].max()*1.1]
+x0, x1 = [features[:, p0].min() * .9, features[:, p0].max() * 1.1]
+y0, y1 = [features[:, p1].min() * .9, features[:, p1].max() * 1.1]
 
-plt.fill_between([t,x1],[y0,y0],[y1,y1],color=area2c)
-plt.fill_between([x0,t],[y0,y0],[y1,y1],color=area1c)
-plt.plot([t,t],[y0,y1],'k--',lw=2)
-plt.plot([t-.1,t-.1],[y0,y1],'k:',lw=2)
-plt.scatter(features[virginica,p0], features[virginica,p1], c='b', marker='o')
-plt.scatter(features[~virginica,p0], features[~virginica,p1], c='r', marker='x')
-plt.ylim(y0,y1)
-plt.xlim(x0,x1)
+plt.fill_between([t, x1], [y0, y0], [y1, y1], color=area2c)
+plt.fill_between([x0, t], [y0, y0], [y1, y1], color=area1c)
+plt.plot([t, t], [y0, y1], 'k--', lw=2)
+plt.plot([t - .1, t - .1], [y0, y1], 'k:', lw=2)
+plt.scatter(features[virginica, p0],
+            features[virginica, p1], c='b', marker='o')
+plt.scatter(features[~virginica, p0],
+            features[~virginica, p1], c='r', marker='x')
+plt.ylim(y0, y1)
+plt.xlim(x0, x1)
 plt.xlabel(feature_names[p0])
 plt.ylabel(feature_names[p1])
 plt.savefig('../1400_02_02.png')
@@ -18,32 +18,34 @@
 
 
 def train_plot(features, labels):
-    y0,y1 = features[:,2].min()*.9, features[:,2].max()*1.1
-    x0,x1 = features[:,0].min()*.9, features[:,0].max()*1.1
-    X = np.linspace(x0,x1,100)
-    Y = np.linspace(y0,y1,100)
-    X,Y = np.meshgrid(X,Y)
-
-    model = learn_model(1, features[:,(0,2)], np.array(labels))
-    C = apply_model(np.vstack([X.ravel(),Y.ravel()]).T, model).reshape(X.shape)
+    y0, y1 = features[:, 2].min() * .9, features[:, 2].max() * 1.1
+    x0, x1 = features[:, 0].min() * .9, features[:, 0].max() * 1.1
+    X = np.linspace(x0, x1, 100)
+    Y = np.linspace(y0, y1, 100)
+    X, Y = np.meshgrid(X, Y)
+
+    model = learn_model(1, features[:, (0, 2)], np.array(labels))
+    C = apply_model(
+        np.vstack([X.ravel(), Y.ravel()]).T, model).reshape(X.shape)
     if COLOUR_FIGURE:
-        cmap = ListedColormap([(1.,.6,.6),(.6,1.,.6),(.6,.6,1.)])
+        cmap = ListedColormap([(1., .6, .6), (.6, 1., .6), (.6, .6, 1.)])
     else:
-        cmap = ListedColormap([(1.,1.,1.),(.2,.2,.2),(.6,.6,.6)])
-    plt.xlim(x0,x1)
-    plt.ylim(y0,y1)
+        cmap = ListedColormap([(1., 1., 1.), (.2, .2, .2), (.6, .6, .6)])
+    plt.xlim(x0, x1)
+    plt.ylim(y0, y1)
     plt.xlabel(feature_names[0])
     plt.ylabel(feature_names[2])
-    plt.pcolormesh(X,Y,C, cmap=cmap)
+    plt.pcolormesh(X, Y, C, cmap=cmap)
     if COLOUR_FIGURE:
-        cmap = ListedColormap([(1.,.0,.0),(.0,1.,.0),(.0,.0,1.)])
-        plt.scatter(features[:,0], features[:,2], c=labels, cmap=cmap)
+        cmap = ListedColormap([(1., .0, .0), (.0, 1., .0), (.0, .0, 1.)])
+        plt.scatter(features[:, 0], features[:, 2], c=labels, cmap=cmap)
     else:
-        for lab,ma in zip(range(3), "Do^"):
-            plt.plot(features[labels == lab,0], features[labels == lab,2], ma, c=(1.,1.,1.))
+        for lab, ma in zip(range(3), "Do^"):
+            plt.plot(features[labels == lab, 0], features[
+                     labels == lab, 2], ma, c=(1., 1., 1.))
 
 
-features,labels = load_dataset('seeds')
+features, labels = load_dataset('seeds')
 names = sorted(set(labels))
 labels = np.array([names.index(ell) for ell in labels])
 
 
@@ -24,4 +24,3 @@
 Training error was {0:.1%}.
 Testing error was {1:.1%} (N = {2}).
 '''.format(train_error, test_error, testing.sum()))
-
@@ -1,29 +1,34 @@
 import numpy as np
+
+
 def learn_model(k, features, labels):
-    return k, features.copy(),labels.copy()
+    return k, features.copy(), labels.copy()
+
 
 def plurality(xs):
     from collections import defaultdict
     counts = defaultdict(int)
     for x in xs:
         counts[x] += 1
     maxv = max(counts.values())
-    for k,v in counts.items():
+    for k, v in counts.items():
         if v == maxv:
             return k
 
+
 def apply_model(features, model):
     k, train_feats, labels = model
     results = []
     for f in features:
         label_dist = []
-        for t,ell in zip(train_feats, labels):
-            label_dist.append( (np.linalg.norm(f-t), ell) )
+        for t, ell in zip(train_feats, labels):
+            label_dist.append((np.linalg.norm(f - t), ell))
         label_dist.sort(key=lambda d_ell: d_ell[0])
         label_dist = label_dist[:k]
-        results.append(plurality([ell for _,ell in label_dist]))
+        results.append(plurality([ell for _, ell in label_dist]))
     return np.array(results)
 
+
 def accuracy(features, labels, model):
     preds = apply_model(features, model)
     return np.mean(preds == labels)
@@ -1,4 +1,6 @@
 import numpy as np
+
+
 def load_dataset(dataset_name):
     '''
     data,labels = load_dataset(dataset_name)
@@ -20,4 +22,3 @@ def load_dataset(dataset_name):
     data = np.array(data)
     labels = np.array(labels)
     return data, labels
-
 
@@ -2,7 +2,8 @@
 import numpy as np
 from knn import learn_model, apply_model, accuracy
 
-features,labels = load_dataset('seeds')
+features, labels = load_dataset('seeds')
+
 
 def cross_validate(features, labels):
     error = 0.0
@@ -14,13 +15,13 @@ def cross_validate(features, labels):
         test_error = accuracy(features[testing], labels[testing], model)
         error += test_error
 
-    return error/ 10.0
+    return error / 10.0
 
 error = cross_validate(features, labels)
 print('Ten fold cross-validated error was {0:.1%}.'.format(error))
 
 features -= features.mean(0)
 features /= features.std(0)
 error = cross_validate(features, labels)
-print('Ten fold cross-validated error after z-scoring was {0:.1%}.'.format(error))
-
+print(
+    'Ten fold cross-validated error after z-scoring was {0:.1%}.'.format(error))
@@ -2,7 +2,7 @@
 import numpy as np
 from threshold import learn_model, apply_model, accuracy
 
-features,labels = load_dataset('seeds')
+features, labels = load_dataset('seeds')
 labels = labels == 'Canadian'
 
 error = 0.0
@@ -17,4 +17,3 @@
 error /= 10.0
 
 print('Ten fold cross-validated error was {0:.1%}.'.format(error))
-
@@ -7,7 +7,7 @@
 target_names = data['target_names']
 labels = target_names[target]
 
-plength = features[:,2]
+plength = features[:, 2]
 is_setosa = (labels == 'setosa')
 print('Maximum of setosa: {0}.'.format(plength[is_setosa].max()))
 print('Minimum of others: {0}.'.format(plength[~is_setosa].min()))