# Notebook mnist from Chap 3 A First Taste of Applied Machine Learning

In [None]:
import pandas as pd
addr640 = pd.read_csv("data/640.csv")

In [None]:
print(addr640.digit.values)
addr640 = addr640.drop('digit', axis=1) # drop digit column

In [None]:
six_img_as_row = addr640.iloc[0].values  # digit '6' is first row
img28x28 = six_img_as_row.reshape(28,28) # unflatten as 2D array
plt.imshow(img28x28, cmap='binary')
plt.show()

In [None]:
six_img_as_row[six_img_as_row>0] = 1  # convert 0..1 to 0 or 1
six_img_as_row = six_img_as_row.astype(int)
img28x28 = six_img_as_row.reshape(28,28)
s = str(img28x28).replace(' ','')     # remove spaces
print(s)

In [None]:
digits = pd.read_csv("data/mnist-10k-sample.csv")
images = digits.drop('digit', axis=1) # get just pixels
targets = digits['digit']             # get just digit value

In [None]:
fig, axes = plt.subplots(10, 5, figsize=(4, 6.5)) # make 10x5 grid of plots

for i, ax in enumerate(axes.flat):
    img_as_row = images.iloc[i].values
    img28x28 = img_as_row.reshape(28,28)
    ax.axis('off') # don't show x, y axes
    ax.imshow(img28x28, cmap='binary')
    ax.text(0, 8, targets[i], color='#313695', fontsize=18)
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier

cl = RandomForestClassifier(n_estimators=900, n_jobs=-1)
cl.fit(images, targets)
pred = cl.predict(addr640)

In [None]:
import numpy as np;
np.set_printoptions(precision=3)

digit_values = range(10)
prob = cl.predict_proba(addr640)
prob_for_2nd_digit = prob[1]
print(prob_for_2nd_digit)

In [None]:
pred_digit = np.argmax(prob_for_2nd_digit)
print("predicted digit is", pred_digit)

In [None]:
prob_for_1st_digit = prob[0]
pred_digit = np.argmax(prob_for_1st_digit)
bars = plt.bar(digit_values, prob_for_1st_digit, color='#4575b4')
bars[pred_digit].set_color('#fdae61')
plt.xlabel("predicted digit")
plt.xticks(digit_values)
plt.ylabel("likelihood 1st image
is a specific digit")
plt.show()

In [None]:
pred_digit = np.argmax(prob_for_2nd_digit)
bars = plt.bar(digit_values, prob_for_2nd_digit, color='#4575b4')
bars[pred_digit].set_color('#fdae61')
plt.xlabel("predicted digit")
plt.xticks(digit_values)
plt.ylabel("likelihood 2nd image
is a specific digit")
plt.show()

In [None]:
prob_for_3rd_digit = prob[2]
pred_digit = np.argmax(prob_for_3rd_digit)
bars = plt.bar(digit_values, prob_for_3rd_digit, color='#4575b4')
bars[pred_digit].set_color('#fdae61')
plt.xlabel("predicted digit")
plt.xticks(digit_values)
plt.ylabel("likelihood 3rd image
is a specific digit")
plt.show()

In [None]:
fours = images[targets==4] # find all "4" images

fig, axes = plt.subplots(15, 8, figsize=(4,6.5))
for i, ax in enumerate(axes.flat):
    img = fours.iloc[i,:].values.reshape(28,28)
    ax.axis('off')
    ax.imshow(img, cmap='binary')