Guide to Keras Deep Learning

Xiao Song

https://xsong.ltd/

As a beginner, it’s necessary to write some simple guides using simple data.

import numpy as np 
import pandas as pd

X_train = pd.read_csv('E:/some_code/py_basic/house_price/data/train1.csv') # prepare data
X_test = pd.read_csv('E:/some_code/py_basic/house_price/data/test1.csv')

Y_train = np.array(X_train['SalePrice'])
Y_train = np.log1p(Y_train)

X_train.drop(['SalePrice'],axis = 1, inplace = True)
Y_train
#> array([12.24769912, 12.10901644, 12.31717117, ..., 12.25486757,
#>        12.49313327, 11.86446927])
X_train.shape
#> (1436, 109)
X_test.shape
#> (1459, 109)

Feature Standardization

Standardize features by removing the mean and scaling to unit variance: \[z = {{x - u} \over s}\]

\(z\) is standardize feature of \(x\), u is the mean of \(x\), and \(s\) is the standard deviation of \(x\).

#from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import StandardScaler

def standardize(df):
    '''standardize features'''
    transformer = StandardScaler().fit(df) 
    newX = transformer.transform(df)
    X = pd.DataFrame(newX,columns = df.columns)
    return X

X_train = standardize(X_train) # X train
X_test = standardize(X_test) 
X_train
#>       MSSubClass  MSZoningRM  ...  SaleTypeWD  SaleConditionOth
#> 0       0.074674   -0.517138  ...    0.389312         -0.465778
#> 1      -0.874282   -0.517138  ...    0.389312         -0.465778
#> 2       0.074674   -0.517138  ...    0.389312         -0.465778
#> 3       0.311913   -0.517138  ...    0.389312          2.146946
#> 4       0.074674   -0.517138  ...    0.389312         -0.465778
#> ...          ...         ...  ...         ...               ...
#> 1431   -0.874282    1.933720  ...    0.389312         -0.465778
#> 1432    0.074674   -0.517138  ...    0.389312         -0.465778
#> 1433   -0.874282   -0.517138  ...    0.389312         -0.465778
#> 1434    0.311913   -0.517138  ...    0.389312         -0.465778
#> 1435   -0.874282   -0.517138  ...    0.389312         -0.465778
#> 
#> [1436 rows x 109 columns]
X_train.describe()
#>          MSSubClass    MSZoningRM  ...    SaleTypeWD  SaleConditionOth
#> count  1.436000e+03  1.436000e+03  ...  1.436000e+03      1.436000e+03
#> mean  -5.721205e-17  3.101821e-16  ... -3.223203e-16     -1.622039e-16
#> std    1.000348e+00  1.000348e+00  ...  1.000348e+00      1.000348e+00
#> min   -8.742817e-01 -5.171379e-01  ... -2.568635e+00     -4.657780e-01
#> 25%   -8.742817e-01 -5.171379e-01  ...  3.893119e-01     -4.657780e-01
#> 50%   -1.625649e-01 -5.171379e-01  ...  3.893119e-01     -4.657780e-01
#> 75%    3.119131e-01 -5.171379e-01  ...  3.893119e-01     -4.657780e-01
#> max    3.158781e+00  1.933720e+00  ...  3.893119e-01      2.146946e+00
#> 
#> [8 rows x 109 columns]
X_train.shape[1]
#> 109

Neural Network Building

from keras import models
#> Using TensorFlow backend.
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   np_resource = np.dtype([("resource", np.ubyte, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
#> C:\ProgramData\Anaconda3\lib\site-packages\tensorboard\compat\tensorflow_stub\dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
#>   np_resource = np.dtype([("resource", np.ubyte, 1)])
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu',input_shape=(X_train.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
model.summary()
#> Model: "sequential_1"
#> _________________________________________________________________
#> Layer (type)                 Output Shape              Param #   
#> =================================================================
#> dense_1 (Dense)              (None, 64)                7040      
#> _________________________________________________________________
#> dense_2 (Dense)              (None, 64)                4160      
#> _________________________________________________________________
#> dense_3 (Dense)              (None, 1)                 65        
#> =================================================================
#> Total params: 11,265
#> Trainable params: 11,265
#> Non-trainable params: 0
#> _________________________________________________________________
#?models.Sequential.fit
model.fit(X_train,Y_train,
    epochs = 20, # Number of epochs to train the model
    batch_size = 512) # Number of samples per gradient update.
#> Epoch 1/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 151.0962 - mae: 12.2742
#> 1436/1436 [==============================] - 0s 199us/step - loss: 135.3658 - mae: 11.5974
#> Epoch 2/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 108.7183 - mae: 10.3854
#> 1436/1436 [==============================] - 0s 8us/step - loss: 101.2545 - mae: 10.0123
#> Epoch 3/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 85.4372 - mae: 9.1780
#> 1436/1436 [==============================] - 0s 10us/step - loss: 78.4810 - mae: 8.7784
#> Epoch 4/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 65.0962 - mae: 7.9595
#> 1436/1436 [==============================] - 0s 6us/step - loss: 59.7942 - mae: 7.6039
#> Epoch 5/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 48.6309 - mae: 6.8086
#> 1436/1436 [==============================] - 0s 6us/step - loss: 44.1790 - mae: 6.4470
#> Epoch 6/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 33.5527 - mae: 5.5194
#> 1436/1436 [==============================] - 0s 5us/step - loss: 31.4665 - mae: 5.3223
#> Epoch 7/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 24.0546 - mae: 4.5429
#> 1436/1436 [==============================] - 0s 6us/step - loss: 21.6761 - mae: 4.2684
#> Epoch 8/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 16.1410 - mae: 3.5735
#> 1436/1436 [==============================] - 0s 4us/step - loss: 14.6640 - mae: 3.3825
#> Epoch 9/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 11.0938 - mae: 2.8590
#> 1436/1436 [==============================] - 0s 6us/step - loss: 9.9929 - mae: 2.6920
#> Epoch 10/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 7.9501 - mae: 2.3592
#> 1436/1436 [==============================] - 0s 6us/step - loss: 7.0364 - mae: 2.1884
#> Epoch 11/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 6.0162 - mae: 2.0298
#> 1436/1436 [==============================] - 0s 6us/step - loss: 5.2163 - mae: 1.8504
#> Epoch 12/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 3.9778 - mae: 1.5933
#> 1436/1436 [==============================] - 0s 6us/step - loss: 4.0612 - mae: 1.6228
#> Epoch 13/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 3.5841 - mae: 1.5480
#> 1436/1436 [==============================] - 0s 6us/step - loss: 3.2758 - mae: 1.4584
#> Epoch 14/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 2.7712 - mae: 1.3477
#> 1436/1436 [==============================] - 0s 5us/step - loss: 2.7333 - mae: 1.3287
#> Epoch 15/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 2.4645 - mae: 1.2552
#> 1436/1436 [==============================] - 0s 5us/step - loss: 2.3305 - mae: 1.2254
#> Epoch 16/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 2.2311 - mae: 1.1872
#> 1436/1436 [==============================] - 0s 5us/step - loss: 2.0266 - mae: 1.1405
#> Epoch 17/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 1.8472 - mae: 1.1000
#> 1436/1436 [==============================] - 0s 5us/step - loss: 1.7764 - mae: 1.0659
#> Epoch 18/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 1.5746 - mae: 1.0130
#> 1436/1436 [==============================] - 0s 6us/step - loss: 1.5834 - mae: 1.0052
#> Epoch 19/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 1.4648 - mae: 0.9598
#> 1436/1436 [==============================] - 0s 6us/step - loss: 1.4131 - mae: 0.9512
#> Epoch 20/20
#> 
#>  512/1436 [=========>....................] - ETA: 0s - loss: 1.2469 - mae: 0.8895
#> 1436/1436 [==============================] - 0s 6us/step - loss: 1.2894 - mae: 0.9093
#> <keras.callbacks.callbacks.History object at 0x0000000059D70780>
#> 
#> WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

Validation data

first I create validation set:

x_val = X_train[:1000]
partial_x_train = X_train[1000:]
y_val = Y_train[:1000]
partial_y_train = Y_train[1000:]
history = model.fit(partial_x_train,partial_y_train,
    epochs=20,
    batch_size=512,
    validation_data=(x_val, y_val))
#> Train on 436 samples, validate on 1000 samples
#> Epoch 1/20
#> 
#> 436/436 [==============================] - 0s 82us/step - loss: 1.0230 - mae: 0.8044 - val_loss: 1.2331 - val_mae: 0.8914
#> Epoch 2/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 0.9380 - mae: 0.7748 - val_loss: 1.2222 - val_mae: 0.8869
#> Epoch 3/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.8750 - mae: 0.7495 - val_loss: 1.2142 - val_mae: 0.8839
#> Epoch 4/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.8200 - mae: 0.7264 - val_loss: 1.2062 - val_mae: 0.8807
#> Epoch 5/20
#> 
#> 436/436 [==============================] - 0s 15us/step - loss: 0.7705 - mae: 0.7045 - val_loss: 1.1986 - val_mae: 0.8777
#> Epoch 6/20
#> 
#> 436/436 [==============================] - 0s 18us/step - loss: 0.7253 - mae: 0.6843 - val_loss: 1.1911 - val_mae: 0.8747
#> Epoch 7/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.6837 - mae: 0.6651 - val_loss: 1.1835 - val_mae: 0.8712
#> Epoch 8/20
#> 
#> 436/436 [==============================] - 0s 15us/step - loss: 0.6453 - mae: 0.6467 - val_loss: 1.1764 - val_mae: 0.8679
#> Epoch 9/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.6095 - mae: 0.6287 - val_loss: 1.1691 - val_mae: 0.8643
#> Epoch 10/20
#> 
#> 436/436 [==============================] - 0s 17us/step - loss: 0.5758 - mae: 0.6111 - val_loss: 1.1620 - val_mae: 0.8608
#> Epoch 11/20
#> 
#> 436/436 [==============================] - 0s 13us/step - loss: 0.5441 - mae: 0.5941 - val_loss: 1.1553 - val_mae: 0.8575
#> Epoch 12/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.5141 - mae: 0.5772 - val_loss: 1.1481 - val_mae: 0.8540
#> Epoch 13/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.4857 - mae: 0.5606 - val_loss: 1.1417 - val_mae: 0.8508
#> Epoch 14/20
#> 
#> 436/436 [==============================] - 0s 11us/step - loss: 0.4589 - mae: 0.5445 - val_loss: 1.1346 - val_mae: 0.8475
#> Epoch 15/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.4334 - mae: 0.5287 - val_loss: 1.1286 - val_mae: 0.8446
#> Epoch 16/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.4094 - mae: 0.5136 - val_loss: 1.1220 - val_mae: 0.8416
#> Epoch 17/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.3867 - mae: 0.4985 - val_loss: 1.1162 - val_mae: 0.8389
#> Epoch 18/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.3651 - mae: 0.4839 - val_loss: 1.1094 - val_mae: 0.8357
#> Epoch 19/20
#> 
#> 436/436 [==============================] - 0s 14us/step - loss: 0.3446 - mae: 0.4697 - val_loss: 1.1043 - val_mae: 0.8332
#> Epoch 20/20
#> 
#> 436/436 [==============================] - 0s 16us/step - loss: 0.3251 - mae: 0.4561 - val_loss: 1.0965 - val_mae: 0.8298

Extract cross validation information:

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)
cv_info = pd.DataFrame({'epochs':epochs,'loss_values':loss_values,'val_loss_values':val_loss_values})  
cv_info
#>     epochs  loss_values  val_loss_values
#> 0        1     1.023004         1.233107
#> 1        2     0.937960         1.222195
#> 2        3     0.875026         1.214215
#> 3        4     0.819979         1.206171
#> 4        5     0.770477         1.198560
#> 5        6     0.725280         1.191091
#> 6        7     0.683680         1.183486
#> 7        8     0.645255         1.176383
#> 8        9     0.609498         1.169093
#> 9       10     0.575821         1.162020
#> 10      11     0.544090         1.155334
#> 11      12     0.514143         1.148096
#> 12      13     0.485749         1.141719
#> 13      14     0.458888         1.134641
#> 14      15     0.433444         1.128641
#> 15      16     0.409398         1.121972
#> 16      17     0.386678         1.116163
#> 17      18     0.365066         1.109400
#> 18      19     0.344581         1.104280
#> 19      20     0.325092         1.096466
cv_info = pd.melt(cv_info, id_vars=['epochs'], value_vars=['loss_values', 'val_loss_values']) 
from plotnine import *

(
ggplot(cv_info,aes('epochs','value',color = 'variable')) +
geom_line() +
geom_point()
)
#> <ggplot: (112683391)>

Prediction

Y_pred = model.predict(X_test)
Y_pred
#> array([[11.546236],
#>        [12.2388  ],
#>        [12.317739],
#>        ...,
#>        [11.350479],
#>        [10.587808],
#>        [12.461804]], dtype=float32)
Y_pred.shape
#> (1459, 1)
Y_pred = np.concatenate(Y_pred).ravel() # to flatten 2 dimition array
Y_pred = np.expm1(Y_pred)
Y_pred
#> array([103386.16, 206652.77, 223626.84, ...,  85005.17,  39647.47,
#>        258280.25], dtype=float32)
test = pd.read_csv('E:/some_code/py_basic/house_price/data/test.csv')
submission = pd.DataFrame({'id': test['Id'], 'SalePrice': Y_pred})
submission.head(10)
#>      id      SalePrice
#> 0  1461  103386.156250
#> 1  1462  206652.765625
#> 2  1463  223626.843750
#> 3  1464  182581.406250
#> 4  1465  269515.000000
#> 5  1466  143906.703125
#> 6  1467   25592.142578
#> 7  1468  377555.125000
#> 8  1469   57377.015625
#> 9  1470  185135.921875
#submission.to_csv('./output/keras.csv',index = False) # save result