From d8181339aaa4f268d05108a348d7dd18c2fc29c2 Mon Sep 17 00:00:00 2001
From: re4lfl0w <re4lfl0w@gmail.com>
Date: Tue, 27 Jan 2015 10:05:21 +0900
Subject: [PATCH 1/2] Modify UnicodeDecodeError text. You'll use utf-8

---
 ch05/classify.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ch05/classify.py b/ch05/classify.py
index ef5af419..26f3731a 100644
--- a/ch05/classify.py
+++ b/ch05/classify.py
@@ -54,6 +54,7 @@ def prepare_sent_features():
         if not text:
             meta[pid]['AvgSentLen'] = meta[pid]['AvgWordLen'] = 0
         else:
+            text = text.decode('utf-8')
             sent_lens = [len(nltk.word_tokenize(
                 sent)) for sent in nltk.sent_tokenize(text)]
             meta[pid]['AvgSentLen'] = np.mean(sent_lens)

From 4de7593567e92c28f4e07c8fcf54ccf4135bab8e Mon Sep 17 00:00:00 2001
From: re4lfl0w <re4lfl0w@gmail.com>
Date: Wed, 28 Jan 2015 06:21:37 +0900
Subject: [PATCH 2/2] Modify UnicodeDecodeError text in Python 2.x

---
 ch05/classify.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ch05/classify.py b/ch05/classify.py
index 26f3731a..76699819 100644
--- a/ch05/classify.py
+++ b/ch05/classify.py
@@ -54,7 +54,9 @@ def prepare_sent_features():
         if not text:
             meta[pid]['AvgSentLen'] = meta[pid]['AvgWordLen'] = 0
         else:
-            text = text.decode('utf-8')
+            from platform import python_version
+            if python_version().startswith('2'):
+                text = text.decode('utf-8')
             sent_lens = [len(nltk.word_tokenize(
                 sent)) for sent in nltk.sent_tokenize(text)]
             meta[pid]['AvgSentLen'] = np.mean(sent_lens)