Guide to Keras Deep Learning

Xiao Song

https://xsong.ltd/

As a beginner, it’s necessary to write some simple guides using simple data.

import warnings
warnings.filterwarnings('ignore')
import numpy as np 
import pandas as pd

X_train = pd.read_csv('E:/some_code/py_basic/house_price/data/train1.csv') # prepare data
X_test = pd.read_csv('E:/some_code/py_basic/house_price/data/test1.csv')

Y_train = np.array(X_train['SalePrice'])
Y_train = np.log1p(Y_train)

X_train.drop(['SalePrice'],axis = 1, inplace = True)
Y_train
#> array([12.24769912, 12.10901644, 12.31717117, ..., 12.25486757,
#>        12.49313327, 11.86446927])
X_train.shape
#> (1436, 109)
X_test.shape
#> (1459, 109)

Feature Standardization

Standardize features by removing the mean and scaling to unit variance: \[z = {{x - u} \over s}\]

\(z\) is standardize feature of \(x\), u is the mean of \(x\), and \(s\) is the standard deviation of \(x\).

#from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler

def standardize(df):
    '''standardize features'''
    transformer = StandardScaler().fit(df) 
    newX = transformer.transform(df)
    X = pd.DataFrame(newX,columns = df.columns)
    return X

X_train = standardize(X_train) # X train
X_test = standardize(X_test) 
X_train
#>       MSSubClass  MSZoningRM  ...  SaleTypeWD  SaleConditionOth
#> 0       0.074674   -0.517138  ...    0.389312         -0.465778
#> 1      -0.874282   -0.517138  ...    0.389312         -0.465778
#> 2       0.074674   -0.517138  ...    0.389312         -0.465778
#> 3       0.311913   -0.517138  ...    0.389312          2.146946
#> 4       0.074674   -0.517138  ...    0.389312         -0.465778
#> ...          ...         ...  ...         ...               ...
#> 1431   -0.874282    1.933720  ...    0.389312         -0.465778
#> 1432    0.074674   -0.517138  ...    0.389312         -0.465778
#> 1433   -0.874282   -0.517138  ...    0.389312         -0.465778
#> 1434    0.311913   -0.517138  ...    0.389312         -0.465778
#> 1435   -0.874282   -0.517138  ...    0.389312         -0.465778
#> 
#> [1436 rows x 109 columns]
X_train.describe()
#>          MSSubClass    MSZoningRM  ...    SaleTypeWD  SaleConditionOth
#> count  1.436000e+03  1.436000e+03  ...  1.436000e+03      1.436000e+03
#> mean  -5.721205e-17  3.101821e-16  ... -3.223203e-16     -1.622039e-16
#> std    1.000348e+00  1.000348e+00  ...  1.000348e+00      1.000348e+00
#> min   -8.742817e-01 -5.171379e-01  ... -2.568635e+00     -4.657780e-01
#> 25%   -8.742817e-01 -5.171379e-01  ...  3.893119e-01     -4.657780e-01
#> 50%   -1.625649e-01 -5.171379e-01  ...  3.893119e-01     -4.657780e-01
#> 75%    3.119131e-01 -5.171379e-01  ...  3.893119e-01     -4.657780e-01
#> max    3.158781e+00  1.933720e+00  ...  3.893119e-01      2.146946e+00
#> 
#> [8 rows x 109 columns]
X_train.shape[1]
#> 109

Neural Network Building

from keras import models
#> Using TensorFlow backend.
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu',input_shape=(X_train.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
model.summary()
#> Model: "sequential_1"
#> _________________________________________________________________
#> Layer (type)                 Output Shape              Param #   
#> =================================================================
#> dense_1 (Dense)              (None, 64)                7040      
#> _________________________________________________________________
#> dense_2 (Dense)              (None, 64)                4160      
#> _________________________________________________________________
#> dense_3 (Dense)              (None, 1)                 65        
#> =================================================================
#> Total params: 11,265
#> Trainable params: 11,265
#> Non-trainable params: 0
#> _________________________________________________________________
#?models.Sequential.fit
model.fit(X_train,Y_train,
    epochs = 20, # Number of epochs to train the model
    batch_size = 512) # Number of samples per gradient update.
#> Epoch 1/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 167.8134 - mae: 12.9369
#> 1436/1436 [==============================] - 0s 192us/step - loss: 156.0890 - mae: 12.4727
#> Epoch 2/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 136.8971 - mae: 11.6853
#> 1436/1436 [==============================] - 0s 20us/step - loss: 131.2387 - mae: 11.4348
#> Epoch 3/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 117.9680 - mae: 10.8331
#> 1436/1436 [==============================] - 0s 31us/step - loss: 114.3489 - mae: 10.6617
#> Epoch 4/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 103.1571 - mae: 10.1137
#> 1436/1436 [==============================] - 0s 22us/step - loss: 99.2716 - mae: 9.9125
#> Epoch 5/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 88.5128 - mae: 9.3423
#> 1436/1436 [==============================] - 0s 12us/step - loss: 84.9596 - mae: 9.1413
#> Epoch 6/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 76.1017 - mae: 8.6295
#> 1436/1436 [==============================] - 0s 6us/step - loss: 71.2614 - mae: 8.3335
#> Epoch 7/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 61.3726 - mae: 7.7027
#> 1436/1436 [==============================] - 0s 5us/step - loss: 58.3051 - mae: 7.4878
#> Epoch 8/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 51.2731 - mae: 6.9841
#> 1436/1436 [==============================] - 0s 5us/step - loss: 46.3172 - mae: 6.6052
#> Epoch 9/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 38.8166 - mae: 5.9853
#> 1436/1436 [==============================] - 0s 5us/step - loss: 35.6208 - mae: 5.7022
#> Epoch 10/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 28.9294 - mae: 5.0723
#> 1436/1436 [==============================] - 0s 6us/step - loss: 26.5030 - mae: 4.8105
#> Epoch 11/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 21.0600 - mae: 4.1822
#> 1436/1436 [==============================] - 0s 6us/step - loss: 19.1521 - mae: 3.9743
#> Epoch 12/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 14.8703 - mae: 3.4164
#> 1436/1436 [==============================] - 0s 6us/step - loss: 13.5940 - mae: 3.2470
#> Epoch 13/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 10.3402 - mae: 2.7747
#> 1436/1436 [==============================] - 0s 6us/step - loss: 9.6409 - mae: 2.6591
#> Epoch 14/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 8.0521 - mae: 2.4184
#> 1436/1436 [==============================] - 0s 5us/step - loss: 6.9790 - mae: 2.2061
#> Epoch 15/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 5.8970 - mae: 1.9865
#> 1436/1436 [==============================] - 0s 5us/step - loss: 5.2444 - mae: 1.8725
#> Epoch 16/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 4.2722 - mae: 1.6932
#> 1436/1436 [==============================] - 0s 8us/step - loss: 4.0995 - mae: 1.6362
#> Epoch 17/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 3.3181 - mae: 1.4729
#> 1436/1436 [==============================] - 0s 5us/step - loss: 3.3192 - mae: 1.4555
#> Epoch 18/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 3.0040 - mae: 1.3727
#> 1436/1436 [==============================] - 0s 6us/step - loss: 2.7599 - mae: 1.3189
#> Epoch 19/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 2.1934 - mae: 1.1728
#> 1436/1436 [==============================] - 0s 4us/step - loss: 2.3355 - mae: 1.2080
#> Epoch 20/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 2.1857 - mae: 1.1770
#> 1436/1436 [==============================] - 0s 6us/step - loss: 2.0144 - mae: 1.1152
#> <keras.callbacks.callbacks.History object at 0x0000000055D1E7F0>
#> 
#> WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

Validation data

first I create validation set:

x_val = X_train[:1000]
partial_x_train = X_train[1000:]
y_val = Y_train[:1000]
partial_y_train = Y_train[1000:]
history = model.fit(partial_x_train,partial_y_train,
    epochs=20,
    batch_size=512,
    validation_data=(x_val, y_val))
#> Train on 436 samples, validate on 1000 samples
#> Epoch 1/20
#> 
#> 436/436 [==============================] - 0s 91us/step - loss: 1.7173 - mae: 1.0405 - val_loss: 1.8037 - val_mae: 1.0472
#> Epoch 2/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 1.5934 - mae: 0.9974 - val_loss: 1.7621 - val_mae: 1.0344
#> Epoch 3/20
#> 
#> 436/436 [==============================] - 0s 32us/step - loss: 1.4883 - mae: 0.9604 - val_loss: 1.7243 - val_mae: 1.0229
#> Epoch 4/20
#> 
#> 436/436 [==============================] - 0s 23us/step - loss: 1.3960 - mae: 0.9270 - val_loss: 1.6895 - val_mae: 1.0122
#> Epoch 5/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 1.3135 - mae: 0.8962 - val_loss: 1.6574 - val_mae: 1.0019
#> Epoch 6/20
#> 
#> 436/436 [==============================] - 0s 21us/step - loss: 1.2386 - mae: 0.8681 - val_loss: 1.6276 - val_mae: 0.9919
#> Epoch 7/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 1.1705 - mae: 0.8418 - val_loss: 1.6002 - val_mae: 0.9827
#> Epoch 8/20
#> 
#> 436/436 [==============================] - 0s 21us/step - loss: 1.1080 - mae: 0.8182 - val_loss: 1.5747 - val_mae: 0.9741
#> Epoch 9/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 1.0504 - mae: 0.7967 - val_loss: 1.5509 - val_mae: 0.9658
#> Epoch 10/20
#> 
#> 436/436 [==============================] - 0s 18us/step - loss: 0.9969 - mae: 0.7762 - val_loss: 1.5284 - val_mae: 0.9580
#> Epoch 11/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.9469 - mae: 0.7570 - val_loss: 1.5073 - val_mae: 0.9510
#> Epoch 12/20
#> 
#> 436/436 [==============================] - 0s 21us/step - loss: 0.9002 - mae: 0.7386 - val_loss: 1.4875 - val_mae: 0.9441
#> Epoch 13/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 0.8564 - mae: 0.7209 - val_loss: 1.4690 - val_mae: 0.9375
#> Epoch 14/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 0.8152 - mae: 0.7037 - val_loss: 1.4513 - val_mae: 0.9312
#> Epoch 15/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.7761 - mae: 0.6872 - val_loss: 1.4346 - val_mae: 0.9251
#> Epoch 16/20
#> 
#> 436/436 [==============================] - 0s 21us/step - loss: 0.7389 - mae: 0.6714 - val_loss: 1.4185 - val_mae: 0.9192
#> Epoch 17/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.7035 - mae: 0.6557 - val_loss: 1.4032 - val_mae: 0.9137
#> Epoch 18/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 0.6697 - mae: 0.6406 - val_loss: 1.3886 - val_mae: 0.9085
#> Epoch 19/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 0.6376 - mae: 0.6257 - val_loss: 1.3746 - val_mae: 0.9037
#> Epoch 20/20
#> 
#> 436/436 [==============================] - 0s 11us/step - loss: 0.6069 - mae: 0.6108 - val_loss: 1.3612 - val_mae: 0.8993

Extract cross validation information:

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)
cv_info = pd.DataFrame({'epochs':epochs,'loss_values':loss_values,'val_loss_values':val_loss_values})  
cv_info
#>     epochs  loss_values  val_loss_values
#> 0        1     1.717334         1.803684
#> 1        2     1.593355         1.762118
#> 2        3     1.488282         1.724254
#> 3        4     1.396037         1.689537
#> 4        5     1.313484         1.657389
#> 5        6     1.238639         1.627649
#> 6        7     1.170498         1.600241
#> 7        8     1.108029         1.574710
#> 8        9     1.050379         1.550859
#> 9       10     0.996855         1.528398
#> 10      11     0.946863         1.507346
#> 11      12     0.900190         1.487519
#> 12      13     0.856390         1.469003
#> 13      14     0.815178         1.451284
#> 14      15     0.776070         1.434589
#> 15      16     0.738918         1.418455
#> 16      17     0.703458         1.403198
#> 17      18     0.669743         1.388627
#> 18      19     0.637636         1.374586
#> 19      20     0.606895         1.361161
cv_info = pd.melt(cv_info, id_vars=['epochs'], value_vars=['loss_values', 'val_loss_values']) 
from plotnine import *

(
ggplot(cv_info,aes('epochs','value',color = 'variable')) +
geom_line() +
geom_point()
)
#> <ggplot: (-9223372036742099754)>

Prediction

Y_pred = model.predict(X_test)
Y_pred
#> array([[12.1244545],
#>        [11.108633 ],
#>        [11.637138 ],
#>        ...,
#>        [13.850517 ],
#>        [11.266023 ],
#>        [10.169427 ]], dtype=float32)
Y_pred.shape
#> (1459, 1)
Y_pred = np.concatenate(Y_pred).ravel() # to flatten 2 dimition array
Y_pred = np.expm1(Y_pred)
Y_pred
#> array([ 184323.77 ,   66743.89 ,  113224.69 , ..., 1035625.7  ,
#>          78120.664,   26092.12 ], dtype=float32)
test = pd.read_csv('E:/some_code/py_basic/house_price/data/test.csv')
submission = pd.DataFrame({'id': test['Id'], 'SalePrice': Y_pred})
submission.head(10)
#>      id      SalePrice
#> 0  1461  184323.765625
#> 1  1462   66743.890625
#> 2  1463  113224.687500
#> 3  1464   35646.246094
#> 4  1465   73891.406250
#> 5  1466  113308.296875
#> 6  1467    5187.318359
#> 7  1468  121809.898438
#> 8  1469   19249.541016
#> 9  1470  121370.992188
#submission.to_csv('./output/keras.csv',index = False) # save result