In [5]:
#libraries
import pandas as pd
import numpy as np
import datetime
from datetime import datetime  
import csv
import os
from os import listdir
import json
import csv
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import RNN
#from keras.utils.np_utils import to_categorical
import keras.backend as K
from keras import regularizers,optimizers
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RepeatedKFold 
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn import tree
#from sklearn.externals.six import StringIO  
#import six
from six import StringIO
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus
np.random.seed(2018)
2023-08-23 13:08:02.770155: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-23 13:08:03.655052: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-23 13:08:03.661031: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-23 13:08:06.240519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [6]:
import pymysql

#establish the connection to the mysql database
host = "192.168.88.187"
port = "3306"
user = "backblaze"
password = "Testing.2023"
database = "backblaze_ml_full"

conn = pymysql.connect(
    host=host,
    port=int(3306),
    user=user,
    passwd=password,
    db=database,
    charset='utf8mb4')
In [7]:
sqldf = pd.read_sql_query("select date, serial_number, model, capacity_bytes, days_to_failure, failure, smart_1_normalized, smart_3_normalized, smart_5_normalized, smart_7_normalized, smart_9_normalized, smart_187_normalized, smart_189_normalized, smart_194_normalized, smart_197_normalized from drive_stats where date >= '2014-03-01' and serial_number in (select distinct(serial_number) from drive_stats where failure=1 and date >= '2014-03-01')", conn)
sqldf
/tmp/ipykernel_2225929/1261091465.py:1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
  sqldf = pd.read_sql_query("select date, serial_number, model, capacity_bytes, days_to_failure, failure, smart_1_normalized, smart_3_normalized, smart_5_normalized, smart_7_normalized, smart_9_normalized, smart_187_normalized, smart_189_normalized, smart_194_normalized, smart_197_normalized from drive_stats where date >= '2014-03-01' and serial_number in (select distinct(serial_number) from drive_stats where failure=1 and date >= '2014-03-01')", conn)
Out[7]:
date serial_number model capacity_bytes days_to_failure failure smart_1_normalized smart_3_normalized smart_5_normalized smart_7_normalized smart_9_normalized smart_187_normalized smart_189_normalized smart_194_normalized smart_197_normalized
0 2014-03-01 MJ1311YNG36USA Hitachi HDS5C3030ALA630 3000592982016 991 0 100.0 138.0 100.0 100.0 98.0 NaN NaN 253.0 100.0
1 2014-03-01 MJ1311YNG733NA Hitachi HDS5C3030ALA630 3000592982016 840 0 100.0 100.0 100.0 100.0 98.0 NaN NaN 250.0 100.0
2 2014-03-01 W3009AX6 ST4000DM000 4000787030016 54 0 119.0 91.0 100.0 87.0 93.0 100.0 99.0 26.0 100.0
3 2014-03-01 WD-WCAV5M690585 WDC WD10EADS 1000204886016 409 0 200.0 191.0 200.0 100.0 68.0 NaN NaN 127.0 200.0
4 2014-03-01 S1F0CSW2 ST3000DM001 3000592982016 229 0 114.0 92.0 100.0 89.0 84.0 100.0 100.0 23.0 100.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
14769522 2023-03-31 7LZ01G30 Seagate BarraCuda SSD ZA250CM10002 250059350016 0 0 100.0 NaN NaN NaN 100.0 NaN NaN 83.0 NaN
14769523 2023-03-31 9JG4657T WDC WUH721414ALE6L4 14000519643136 0 0 100.0 85.0 100.0 100.0 98.0 NaN NaN 55.0 100.0
14769524 2023-03-31 6090A00RFVKG TOSHIBA MG08ACA16TA 16000900661248 0 0 100.0 100.0 100.0 100.0 87.0 NaN NaN 100.0 100.0
14769525 2023-03-31 51R0A2Q8FVGG TOSHIBA MG08ACA16TE 16000900661248 0 0 100.0 100.0 100.0 100.0 70.0 NaN NaN 100.0 100.0
14769526 2023-03-31 7QT032NR Seagate BarraCuda 120 SSD ZA250CM10003 250059350016 0 0 100.0 NaN NaN NaN 100.0 NaN NaN 96.0 NaN

14769527 rows × 15 columns

Functions¶

In [8]:
def computeDay(group):
  group = group.sort_values('date')    #ordino in base ai giorni... dal più recente al meno
  group['DayToFailure'] = list(range(group.shape[0]-1, -1,-1 ))
  return group

#override the series_to_supervised method to work without classes
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
  n_vars = data.shape[1]
  cols, names = list(), list()
  dataclass = data[data.columns[-1:]]
  data = data.drop(columns= ['serial_number', 'DayToFailure'], axis = 1)
  columns = data.columns
  # input sequence (t-n, ... t-1)  #non arrivo all'osservazione corrente
  for i in range(n_in-1, 0, -1):
    cols.append(data.shift(i))
    names += [(element + '(t-%d)' % (i)) for element in columns]
    
  for i in range(0, n_out):
    cols.append(data.shift(-i))
    if i == 0:
      names += [(element+'(t)') for element in columns]
    else:
      names += [(element +'(t+%d)' % (i)) for element in columns]
  
  cols.append(dataclass)   #appendo le ultime cinque colonne
  names += ['DayToFailure']
    
  agg = pd.concat(cols, axis=1)
  agg.columns = names
  if dropnan:
    agg.dropna(inplace=True)
  
  return agg
In [9]:
#Preprocessing

df = sqldf.copy()
df = df.drop('model', axis=1)
df = df.drop('capacity_bytes', axis=1)

df.date = pd.to_datetime(df.date, format='%Y-%m-%d').dt.date

scaler = MinMaxScaler(feature_range = (-1,1))
df[['smart_1_normalized', 'smart_3_normalized', 'smart_5_normalized', 'smart_7_normalized',
    'smart_9_normalized', 'smart_187_normalized', 'smart_189_normalized', 'smart_194_normalized',
    'smart_197_normalized']] = scaler.fit_transform(df[['smart_1_normalized', 'smart_3_normalized', 
    'smart_5_normalized', 'smart_7_normalized', 'smart_9_normalized', 'smart_187_normalized', 
    'smart_189_normalized', 'smart_194_normalized', 'smart_197_normalized']])

df
Out[9]:
date serial_number days_to_failure failure smart_1_normalized smart_3_normalized smart_5_normalized smart_7_normalized smart_9_normalized smart_187_normalized smart_189_normalized smart_194_normalized smart_197_normalized
0 2014-03-01 MJ1311YNG36USA 991 0 -0.005025 -0.256831 -0.211155 -0.211155 -0.227092 NaN NaN 1.000000 -0.211155
1 2014-03-01 MJ1311YNG733NA 840 0 -0.005025 -0.672131 -0.211155 -0.211155 -0.227092 NaN NaN 0.975104 -0.211155
2 2014-03-01 W3009AX6 54 0 0.185930 -0.770492 -0.211155 -0.314741 -0.266932 1.0 0.979798 -0.883817 -0.211155
3 2014-03-01 WD-WCAV5M690585 409 0 1.000000 0.322404 0.585657 -0.211155 -0.466135 NaN NaN -0.045643 0.585657
4 2014-03-01 S1F0CSW2 229 0 0.135678 -0.759563 -0.211155 -0.298805 -0.338645 1.0 1.000000 -0.908714 -0.211155
... ... ... ... ... ... ... ... ... ... ... ... ... ...
14769522 2023-03-31 7LZ01G30 0 0 -0.005025 NaN NaN NaN -0.211155 NaN NaN -0.410788 NaN
14769523 2023-03-31 9JG4657T 0 0 -0.005025 -0.836066 -0.211155 -0.211155 -0.227092 NaN NaN -0.643154 -0.211155
14769524 2023-03-31 6090A00RFVKG 0 0 -0.005025 -0.672131 -0.211155 -0.211155 -0.314741 NaN NaN -0.269710 -0.211155
14769525 2023-03-31 51R0A2Q8FVGG 0 0 -0.005025 -0.672131 -0.211155 -0.211155 -0.450199 NaN NaN -0.269710 -0.211155
14769526 2023-03-31 7QT032NR 0 0 -0.005025 NaN NaN NaN -0.211155 NaN NaN -0.302905 NaN

14769527 rows × 13 columns

In [10]:
#cleanup garbage entries -> apparently there are entries which have a failure reported on a date
#and then they still report measurements after that date -> these need to be cleared

test=df.copy();
#test
test2 = pd.DataFrame({'serial_number':test.loc[test['failure'] == 1]['serial_number'], 'failure_date':test.loc[test['failure'] == 1]['date']})

#test2

test3 = test.join(test2.set_index('serial_number'), on='serial_number')
#test3
clean = test3.drop(test3[test3['date'] > test3['failure_date']].index)
clean = clean.drop('failure_date', axis=1)
clean
Out[10]:
date serial_number days_to_failure failure smart_1_normalized smart_3_normalized smart_5_normalized smart_7_normalized smart_9_normalized smart_187_normalized smart_189_normalized smart_194_normalized smart_197_normalized
0 2014-03-01 MJ1311YNG36USA 991 0 -0.005025 -0.256831 -0.211155 -0.211155 -0.227092 NaN NaN 1.000000 -0.211155
1 2014-03-01 MJ1311YNG733NA 840 0 -0.005025 -0.672131 -0.211155 -0.211155 -0.227092 NaN NaN 0.975104 -0.211155
2 2014-03-01 W3009AX6 54 0 0.185930 -0.770492 -0.211155 -0.314741 -0.266932 1.000000 0.979798 -0.883817 -0.211155
3 2014-03-01 WD-WCAV5M690585 409 0 1.000000 0.322404 0.585657 -0.211155 -0.466135 NaN NaN -0.045643 0.585657
4 2014-03-01 S1F0CSW2 229 0 0.135678 -0.759563 -0.211155 -0.298805 -0.338645 1.000000 1.000000 -0.908714 -0.211155
... ... ... ... ... ... ... ... ... ... ... ... ... ...
14769333 2023-03-30 2AGMNB7Y 0 1 -0.005025 -0.672131 -0.211155 -0.211155 -0.243028 NaN NaN 0.078838 -0.211155
14769335 2023-03-30 8HH0KRGH 0 1 -0.246231 -0.672131 -0.211155 -0.211155 -0.235060 NaN NaN 0.278008 -0.211155
14769341 2023-03-30 ZLW16KEQ 0 1 -0.226131 -0.781421 -0.211155 -0.362550 -0.402390 0.979798 NaN -0.809129 -0.211155
14769343 2023-03-30 X0GE5KSC 0 1 -0.005025 -0.737705 -0.211155 -0.211155 -0.235060 NaN NaN -0.551867 -0.211155
14769346 2023-03-30 61B0A03NF97G 0 1 -0.005025 -0.672131 -0.211155 -0.211155 -0.498008 NaN NaN -0.269710 -0.211155

14442321 rows × 13 columns

In [11]:
df=clean.copy()
df = df.drop(columns= ['days_to_failure'], axis = 1)
df
Out[11]:
date serial_number failure smart_1_normalized smart_3_normalized smart_5_normalized smart_7_normalized smart_9_normalized smart_187_normalized smart_189_normalized smart_194_normalized smart_197_normalized
0 2014-03-01 MJ1311YNG36USA 0 -0.005025 -0.256831 -0.211155 -0.211155 -0.227092 NaN NaN 1.000000 -0.211155
1 2014-03-01 MJ1311YNG733NA 0 -0.005025 -0.672131 -0.211155 -0.211155 -0.227092 NaN NaN 0.975104 -0.211155
2 2014-03-01 W3009AX6 0 0.185930 -0.770492 -0.211155 -0.314741 -0.266932 1.000000 0.979798 -0.883817 -0.211155
3 2014-03-01 WD-WCAV5M690585 0 1.000000 0.322404 0.585657 -0.211155 -0.466135 NaN NaN -0.045643 0.585657
4 2014-03-01 S1F0CSW2 0 0.135678 -0.759563 -0.211155 -0.298805 -0.338645 1.000000 1.000000 -0.908714 -0.211155
... ... ... ... ... ... ... ... ... ... ... ... ...
14769333 2023-03-30 2AGMNB7Y 1 -0.005025 -0.672131 -0.211155 -0.211155 -0.243028 NaN NaN 0.078838 -0.211155
14769335 2023-03-30 8HH0KRGH 1 -0.246231 -0.672131 -0.211155 -0.211155 -0.235060 NaN NaN 0.278008 -0.211155
14769341 2023-03-30 ZLW16KEQ 1 -0.226131 -0.781421 -0.211155 -0.362550 -0.402390 0.979798 NaN -0.809129 -0.211155
14769343 2023-03-30 X0GE5KSC 1 -0.005025 -0.737705 -0.211155 -0.211155 -0.235060 NaN NaN -0.551867 -0.211155
14769346 2023-03-30 61B0A03NF97G 1 -0.005025 -0.672131 -0.211155 -0.211155 -0.498008 NaN NaN -0.269710 -0.211155

14442321 rows × 12 columns

In [12]:
dfHour = df.groupby(['serial_number']).apply(computeDay)
dfHour = dfHour[dfHour.DayToFailure <= 120]
dfHour = dfHour.drop(columns = ['date'])
dfHour= dfHour.drop(columns= ['failure','serial_number'], axis=1)
dfHour=dfHour.reset_index()
dfHour= dfHour.drop(columns= ['level_1'], axis=1)

window=90

print('Creating the sequence')

dfHourSequence =  dfHour.groupby(['serial_number']).apply(series_to_supervised, n_in=window, n_out=1, dropnan=True)
dfHourSequence
Creating the sequence
Out[12]:
smart_1_normalized(t-89) smart_3_normalized(t-89) smart_5_normalized(t-89) smart_7_normalized(t-89) smart_9_normalized(t-89) smart_187_normalized(t-89) smart_189_normalized(t-89) smart_194_normalized(t-89) smart_197_normalized(t-89) smart_1_normalized(t-88) ... smart_1_normalized(t) smart_3_normalized(t) smart_5_normalized(t) smart_7_normalized(t) smart_9_normalized(t) smart_187_normalized(t) smart_189_normalized(t) smart_194_normalized(t) smart_197_normalized(t) DayToFailure
serial_number
5VML01P0 107293 0.175879 -0.704918 -0.211155 -0.314741 -0.577689 1.0 1.0 -0.883817 -0.211155 0.135678 ... 0.165829 -0.704918 -0.211155 -0.314741 -0.601594 1.000000 1.0 -0.875519 -0.211155 31
107294 0.135678 -0.704918 -0.211155 -0.314741 -0.577689 1.0 1.0 -0.883817 -0.211155 0.185930 ... 0.105528 -0.704918 -0.211155 -0.314741 -0.601594 1.000000 1.0 -0.875519 -0.211155 30
107295 0.185930 -0.704918 -0.211155 -0.314741 -0.577689 1.0 1.0 -0.883817 -0.211155 0.155779 ... 0.175879 -0.704918 -0.211155 -0.314741 -0.601594 1.000000 1.0 -0.875519 -0.211155 29
107296 0.155779 -0.704918 -0.211155 -0.314741 -0.577689 1.0 1.0 -0.883817 -0.211155 0.195980 ... 0.135678 -0.704918 -0.211155 -0.314741 -0.601594 1.000000 1.0 -0.875519 -0.211155 28
107297 0.195980 -0.704918 -0.211155 -0.314741 -0.577689 1.0 1.0 -0.875519 -0.211155 0.165829 ... 0.175879 -0.704918 -0.211155 -0.314741 -0.601594 1.000000 1.0 -0.875519 -0.211155 27
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ZTT3STWF 1987895 -0.206030 -0.693989 -0.211155 -0.338645 -0.258964 1.0 1.0 -0.941909 -0.211155 -0.206030 ... -0.507538 -0.693989 -0.211155 -0.330677 -0.274900 0.414141 1.0 -0.950207 -0.211155 4
1987896 -0.206030 -0.693989 -0.211155 -0.338645 -0.258964 1.0 1.0 -0.941909 -0.211155 -0.236181 ... -0.507538 -0.693989 -0.211155 -0.330677 -0.274900 0.414141 1.0 -0.950207 -0.211155 3
1987897 -0.236181 -0.693989 -0.211155 -0.338645 -0.258964 1.0 1.0 -0.933610 -0.211155 -0.216080 ... -0.507538 -0.693989 -0.211155 -0.330677 -0.274900 0.414141 1.0 -0.950207 -0.211155 2
1987898 -0.216080 -0.693989 -0.211155 -0.338645 -0.258964 1.0 1.0 -0.941909 -0.211155 -0.206030 ... -0.507538 -0.693989 -0.211155 -0.330677 -0.274900 0.414141 1.0 -0.950207 -0.211155 1
1987899 -0.206030 -0.693989 -0.211155 -0.338645 -0.258964 1.0 1.0 -0.941909 -0.211155 -0.185930 ... -0.507538 -0.693989 -0.211155 -0.330677 -0.274900 0.414141 1.0 -0.950207 -0.211155 0

277551 rows × 811 columns

In [13]:
print('Dividing into train test')

X_train, X_rim, y_train, y_rim = train_test_split(dfHourSequence[dfHourSequence.columns[:-1]], 
                                                  dfHourSequence[dfHourSequence.columns[-1:]] ,
                                                  stratify=dfHourSequence[dfHourSequence.columns[-1:]], 
                                                  test_size=0.30)
Dividing into train test
In [14]:
print(y_train)
print(y_train.columns)
                       DayToFailure
serial_number                      
W300T3EK      687360              6
Z305DHV4      1231919            11
ZA17ZFEW      1446046            11
Z302SWXT      1021026             5
S301KQQX      554027             26
...                             ...
Z304TK95      1166742             6
Z305FNVM      1240279             0
Z303N1QB      1052406            10
W300R8BD      683889              3
Z302B11W      999139             29

[194285 rows x 1 columns]
Index(['DayToFailure'], dtype='object')
In [15]:
X_val, X_test, y_val, y_test = train_test_split(X_rim, y_rim ,stratify=y_rim, test_size=0.50)
In [16]:
X_train = pd.concat([X_train, pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_val = pd.concat([X_val,  pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_test = pd.concat([X_test, pd.DataFrame(columns = ['DayToFailure'])], sort = True)

X_train[['DayToFailure']] = y_train.values
X_val[['DayToFailure']] = y_val.values
X_test[['DayToFailure']] = y_test.values
In [17]:
X_train
#X_val
#X_test
Out[17]:
DayToFailure smart_187_normalized(t) smart_187_normalized(t-1) smart_187_normalized(t-10) smart_187_normalized(t-11) smart_187_normalized(t-12) smart_187_normalized(t-13) smart_187_normalized(t-14) smart_187_normalized(t-15) smart_187_normalized(t-16) ... smart_9_normalized(t-81) smart_9_normalized(t-82) smart_9_normalized(t-83) smart_9_normalized(t-84) smart_9_normalized(t-85) smart_9_normalized(t-86) smart_9_normalized(t-87) smart_9_normalized(t-88) smart_9_normalized(t-89) smart_9_normalized(t-9)
W300T3EK 687360 6 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... -0.219124 -0.219124 -0.219124 -0.219124 -0.219124 -0.219124 -0.219124 -0.219124 -0.219124 -0.235060
Z305DHV4 1231919 11 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.474104
ZA17ZFEW 1446046 11 -0.878788 -0.878788 -0.878788 -0.878788 -0.878788 -0.878788 -0.878788 -0.878788 -0.878788 ... -0.617530 -0.617530 -0.617530 -0.617530 -0.617530 -0.617530 -0.617530 -0.617530 -0.617530 -0.633466
Z302SWXT 1021026 5 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.330677
S301KQQX 554027 26 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.314741 -0.330677
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Z304TK95 1166742 6 0.252525 0.252525 0.252525 0.252525 0.252525 0.252525 0.252525 0.252525 0.272727 ... -0.442231 -0.442231 -0.442231 -0.442231 -0.442231 -0.442231 -0.442231 -0.442231 -0.442231 -0.458167
Z305FNVM 1240279 0 -0.131313 0.272727 0.454545 0.575758 0.575758 0.575758 0.575758 0.575758 0.575758 ... -0.593625 -0.593625 -0.593625 -0.593625 -0.593625 -0.593625 -0.585657 -0.585657 -0.585657 -0.609562
Z303N1QB 1052406 10 0.797980 0.797980 0.797980 0.797980 0.797980 0.797980 0.797980 0.797980 0.797980 ... -0.466135 -0.466135 -0.466135 -0.466135 -0.466135 -0.466135 -0.466135 -0.466135 -0.466135 -0.482072
W300R8BD 683889 3 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... -0.426295 -0.426295 -0.426295 -0.426295 -0.426295 -0.426295 -0.426295 -0.418327 -0.418327 -0.442231
Z302B11W 999139 29 0.878788 0.878788 0.878788 0.878788 0.878788 0.878788 0.878788 0.878788 0.878788 ... -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.458167 -0.474104

194285 rows × 811 columns

In [18]:
X_train.shape
Out[18]:
(194285, 811)
In [19]:
X_val.shape
Out[19]:
(41633, 811)
In [20]:
Xtrain = X_train.copy()
Xtrain = Xtrain.drop(columns=['DayToFailure'], axis=1 )
Xtrain.shape
#Xtrain
Out[20]:
(194285, 810)
In [21]:
Xval = X_val.copy()
Xval = Xval.drop(columns=['DayToFailure'], axis=1 )
Xval.shape
Out[21]:
(41633, 810)
In [22]:
yTest = X_test[['DayToFailure']].values
#yTest
In [23]:
Xtest = X_test.drop(columns=['DayToFailure'], axis=1 )
#Xtest
In [24]:
#reshape with window
Xtrain = Xtrain.values.reshape(Xtrain.shape[0], window, int(Xtrain.shape[1]/window))
Xval = Xval.values.reshape(Xval.shape[0], window, int(Xval.shape[1]/window))
Xtest= Xtest.values.reshape(Xtest.shape[0], window, int(Xtest.shape[1]/window))
In [25]:
ytrain = X_train[['DayToFailure']].values
yVal = X_val[['DayToFailure']].values
In [26]:
print(Xtrain.shape)
print(Xval.shape)
print(Xtest.shape)

print(ytrain.shape)
print(yVal.shape)
print(yTest.shape)
(194285, 90, 9)
(41633, 90, 9)
(41633, 90, 9)
(194285, 1)
(41633, 1)
(41633, 1)

Model¶

In [27]:
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import keras

#same as experiment6 - BiLSTM
def build_model():
  dp_lvl = 0.1
  model = Sequential()
  model.add(Bidirectional(LSTM(128, input_shape=(window, 9), return_sequences =  True, activation = "tanh" )))
  model.add(Bidirectional(LSTM(64, return_sequences =  True, activation = "tanh" )))
  model.add(Bidirectional(LSTM(32, activation="tanh")))
  model.add(Dense(96, activation='relu'))
  model.add(Dense(128, activation='relu'))
  model.add(Dense(1))
  return model

Training¶

In [ ]:
#same as experiment6 - LSTM
epoch = 150
historyvet =[]
model = build_model()    
best_acc= 0.0

#adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0,amsgrad=False)
adam = tf.keras.optimizers.legacy.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0,amsgrad=False)
#adam=tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=adam)

for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  #model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal))
  historyvet.append(history.history)
    
model.save('bilstm_predict_rul_experiment8_extended_full_take3.h5')
model.summary()
/usr/local/lib/python3.10/dist-packages/keras/src/optimizers/legacy/adam.py:118: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
  super().__init__(name, **kwargs)
Epoch {0}
389/389 [==============================] - 340s 841ms/step - loss: 92.4284 - val_loss: 85.2481
Epoch {1}
389/389 [==============================] - 321s 825ms/step - loss: 85.1641 - val_loss: 85.7237
Epoch {2}
389/389 [==============================] - 323s 829ms/step - loss: 84.6595 - val_loss: 83.9631
Epoch {3}
194/389 [=============>................] - ETA: 2:26 - loss: 84.2638
In [26]:
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]

for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  

plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
In [27]:
#skip this cell if the above one finished and you are continuing the work , otherwise if say you disconnected the notebook and want to resume run this one to load the model it generated overnight
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3.h5')
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 143s 108ms/step - loss: 67.4307
67.43070220947266
In [28]:
pred = model.predict(Xtest)
1302/1302 [==============================] - 138s 104ms/step
In [29]:
y = yTest.copy()
yhat = pred.copy()

# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))

print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation:
MAE: 6.714504805846707
MSE: 67.43068458432414
RMSE: 8.211618877196148
R-Squared: [0.20853765]

More training as it looks like it wants to get somewhere interesting

In [ ]:
#another 150 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3.h5')
epoch = 150

for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  historyvet.append(history.history)

model.save('bilstm_predict_rul_experiment8_extended_full_take3_part2.h5')
model.summary()
Epoch {0}
389/389 [==============================] - 343s 862ms/step - loss: 64.2593 - val_loss: 68.3754
Epoch {1}
389/389 [==============================] - 332s 852ms/step - loss: 63.3964 - val_loss: 67.4172
Epoch {2}
389/389 [==============================] - 329s 847ms/step - loss: 63.3546 - val_loss: 67.3026
Epoch {3}
389/389 [==============================] - 332s 854ms/step - loss: 62.9733 - val_loss: 67.0207
Epoch {4}
389/389 [==============================] - 333s 856ms/step - loss: 62.8247 - val_loss: 67.2608
Epoch {5}
389/389 [==============================] - 329s 846ms/step - loss: 62.2602 - val_loss: 67.5695
Epoch {6}
389/389 [==============================] - 336s 862ms/step - loss: 62.5081 - val_loss: 66.4262
Epoch {7}
389/389 [==============================] - 332s 855ms/step - loss: 62.3149 - val_loss: 66.7445
Epoch {8}
389/389 [==============================] - 337s 866ms/step - loss: 61.9950 - val_loss: 66.2608
Epoch {9}
389/389 [==============================] - 334s 859ms/step - loss: 61.4956 - val_loss: 65.5214
Epoch {10}
389/389 [==============================] - 334s 859ms/step - loss: 61.2620 - val_loss: 65.0429
Epoch {11}
389/389 [==============================] - 334s 859ms/step - loss: 61.1446 - val_loss: 65.6227
Epoch {12}
389/389 [==============================] - 327s 841ms/step - loss: 60.8006 - val_loss: 65.3122
Epoch {13}
389/389 [==============================] - 331s 851ms/step - loss: 61.2606 - val_loss: 64.9975
Epoch {14}
389/389 [==============================] - 328s 842ms/step - loss: 60.2370 - val_loss: 64.4013
Epoch {15}
389/389 [==============================] - 329s 847ms/step - loss: 60.1839 - val_loss: 64.5551
Epoch {16}
389/389 [==============================] - 330s 849ms/step - loss: 59.7568 - val_loss: 64.6951
Epoch {17}
389/389 [==============================] - 332s 854ms/step - loss: 59.8941 - val_loss: 63.5398
Epoch {18}
389/389 [==============================] - 335s 862ms/step - loss: 59.6660 - val_loss: 64.0032
Epoch {19}
389/389 [==============================] - 332s 853ms/step - loss: 59.0889 - val_loss: 64.5364
Epoch {20}
389/389 [==============================] - 333s 857ms/step - loss: 58.7660 - val_loss: 63.8183
Epoch {21}
389/389 [==============================] - 333s 855ms/step - loss: 59.8411 - val_loss: 64.7533
Epoch {22}
389/389 [==============================] - 331s 852ms/step - loss: 58.6592 - val_loss: 63.8076
Epoch {23}
389/389 [==============================] - 332s 855ms/step - loss: 58.1071 - val_loss: 63.0844
Epoch {24}
389/389 [==============================] - 331s 852ms/step - loss: 57.9922 - val_loss: 63.0138
Epoch {25}
389/389 [==============================] - 334s 860ms/step - loss: 58.1064 - val_loss: 63.5760
Epoch {26}
389/389 [==============================] - 333s 856ms/step - loss: 57.3484 - val_loss: 62.2100
Epoch {27}
389/389 [==============================] - 332s 853ms/step - loss: 57.0489 - val_loss: 61.7560
Epoch {28}
389/389 [==============================] - 333s 856ms/step - loss: 56.7409 - val_loss: 61.9766
Epoch {29}
389/389 [==============================] - 329s 846ms/step - loss: 56.1090 - val_loss: 60.9149
Epoch {30}
389/389 [==============================] - 336s 864ms/step - loss: 56.4711 - val_loss: 60.8571
Epoch {31}
389/389 [==============================] - 331s 851ms/step - loss: 56.0427 - val_loss: 61.6544
Epoch {32}
389/389 [==============================] - 330s 849ms/step - loss: 57.5427 - val_loss: 66.4305
Epoch {33}
389/389 [==============================] - 330s 848ms/step - loss: 56.3025 - val_loss: 59.7977
Epoch {34}
389/389 [==============================] - 328s 842ms/step - loss: 55.1212 - val_loss: 61.3966
Epoch {35}
389/389 [==============================] - 328s 843ms/step - loss: 54.9564 - val_loss: 59.9022
Epoch {36}
389/389 [==============================] - 327s 841ms/step - loss: 54.4106 - val_loss: 60.0068
Epoch {37}
389/389 [==============================] - 329s 845ms/step - loss: 54.4488 - val_loss: 59.5027
Epoch {38}
389/389 [==============================] - 330s 847ms/step - loss: 53.9621 - val_loss: 58.7557
Epoch {39}
389/389 [==============================] - 326s 839ms/step - loss: 53.7698 - val_loss: 58.6915
Epoch {40}
389/389 [==============================] - 327s 841ms/step - loss: 53.8168 - val_loss: 61.1053
Epoch {41}
389/389 [==============================] - 326s 838ms/step - loss: 54.2586 - val_loss: 61.5899
Epoch {42}
389/389 [==============================] - 329s 847ms/step - loss: 53.1145 - val_loss: 58.5002
Epoch {43}
389/389 [==============================] - 327s 841ms/step - loss: 52.4294 - val_loss: 58.4195
Epoch {44}
389/389 [==============================] - 327s 841ms/step - loss: 52.4108 - val_loss: 57.0662
Epoch {45}
389/389 [==============================] - 327s 840ms/step - loss: 52.5267 - val_loss: 57.3894
Epoch {46}
389/389 [==============================] - 327s 840ms/step - loss: 51.9211 - val_loss: 57.6161
Epoch {47}
389/389 [==============================] - 328s 844ms/step - loss: 52.1283 - val_loss: 56.9461
Epoch {48}
389/389 [==============================] - 327s 840ms/step - loss: 51.1085 - val_loss: 56.3701
Epoch {49}
389/389 [==============================] - 330s 848ms/step - loss: 50.8462 - val_loss: 57.8374
Epoch {50}
389/389 [==============================] - 326s 839ms/step - loss: 51.0079 - val_loss: 56.1585
Epoch {51}
389/389 [==============================] - 328s 842ms/step - loss: 50.0825 - val_loss: 56.3187
Epoch {52}
389/389 [==============================] - 325s 837ms/step - loss: 50.1658 - val_loss: 56.2422
Epoch {53}
389/389 [==============================] - 327s 841ms/step - loss: 49.6024 - val_loss: 55.4322
Epoch {54}
389/389 [==============================] - 329s 846ms/step - loss: 49.7728 - val_loss: 54.7449
Epoch {55}
389/389 [==============================] - 326s 838ms/step - loss: 49.6413 - val_loss: 54.7127
Epoch {56}
389/389 [==============================] - 329s 845ms/step - loss: 48.9819 - val_loss: 55.4102
Epoch {57}
389/389 [==============================] - 325s 836ms/step - loss: 48.8032 - val_loss: 55.7070
Epoch {58}
389/389 [==============================] - 326s 839ms/step - loss: 48.5199 - val_loss: 56.3114
Epoch {59}
389/389 [==============================] - 328s 844ms/step - loss: 47.7044 - val_loss: 54.0407
Epoch {60}
389/389 [==============================] - 327s 841ms/step - loss: 47.6278 - val_loss: 52.7045
Epoch {61}
389/389 [==============================] - 333s 855ms/step - loss: 47.4723 - val_loss: 53.4739
Epoch {62}
389/389 [==============================] - 326s 837ms/step - loss: 47.1001 - val_loss: 56.5136
Epoch {63}
389/389 [==============================] - 328s 842ms/step - loss: 48.0536 - val_loss: 55.6724
Epoch {64}
389/389 [==============================] - 329s 845ms/step - loss: 46.8253 - val_loss: 53.2679
Epoch {65}
389/389 [==============================] - 327s 841ms/step - loss: 46.7869 - val_loss: 52.4258
Epoch {66}
389/389 [==============================] - 328s 844ms/step - loss: 46.4162 - val_loss: 54.8050
Epoch {67}
389/389 [==============================] - 325s 836ms/step - loss: 45.9856 - val_loss: 52.2177
Epoch {68}
389/389 [==============================] - 326s 838ms/step - loss: 44.9784 - val_loss: 52.2360
Epoch {69}
389/389 [==============================] - ETA: 0s - loss: 44.7546
In [29]:
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]

for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  

plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
In [30]:
pred = model.predict(Xtest)
1302/1302 [==============================] - 125s 94ms/step
In [31]:
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 126s 96ms/step - loss: 33.3588
33.35878372192383
In [32]:
y = yTest.copy()
yhat = pred.copy()

# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))

print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation:
MAE: 4.252981828318384
MSE: 33.35878218176548
RMSE: 5.775706206323646
R-Squared: [0.60845392]

Performance evaluation¶

In [33]:
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
In [34]:
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
In [35]:
y = yTest.copy()
yhat = pred.copy()
In [36]:
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))

print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation:
MAE: 4.252981828318384
MSE: 33.35878218176548
RMSE: 5.775706206323646
R-Squared: [0.60845392]
In [ ]:
#another 150 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part2.h5')
epoch = 150

for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  historyvet.append(history.history)

model.save('bilstm_predict_rul_experiment8_extended_full_take3_part3.h5')
model.summary()
Epoch {0}
389/389 [==============================] - 338s 853ms/step - loss: 25.8730 - val_loss: 36.7638
Epoch {1}
389/389 [==============================] - 330s 847ms/step - loss: 28.3172 - val_loss: 33.4057
Epoch {2}
389/389 [==============================] - 328s 842ms/step - loss: 27.1991 - val_loss: 35.4056
Epoch {3}
389/389 [==============================] - 328s 844ms/step - loss: 27.9181 - val_loss: 34.8644
Epoch {4}
389/389 [==============================] - 328s 843ms/step - loss: 26.4577 - val_loss: 37.4612
Epoch {5}
389/389 [==============================] - 328s 843ms/step - loss: 26.3098 - val_loss: 35.6996
Epoch {6}
389/389 [==============================] - 331s 851ms/step - loss: 24.5580 - val_loss: 30.9044
Epoch {7}
389/389 [==============================] - 328s 843ms/step - loss: 23.9100 - val_loss: 36.0782
Epoch {8}
389/389 [==============================] - 331s 851ms/step - loss: 40.0271 - val_loss: 38.7793
Epoch {9}
389/389 [==============================] - 326s 838ms/step - loss: 27.4546 - val_loss: 36.4517
Epoch {10}
389/389 [==============================] - 328s 843ms/step - loss: 27.2428 - val_loss: 33.3448
Epoch {11}
389/389 [==============================] - 328s 843ms/step - loss: 37.9628 - val_loss: 64.6191
Epoch {12}
389/389 [==============================] - 327s 840ms/step - loss: 45.6755 - val_loss: 42.1760
Epoch {13}
389/389 [==============================] - 330s 849ms/step - loss: 29.6218 - val_loss: 33.5977
Epoch {14}
389/389 [==============================] - 326s 838ms/step - loss: 25.4958 - val_loss: 33.2421
Epoch {15}
389/389 [==============================] - 329s 845ms/step - loss: 30.3199 - val_loss: 45.8646
Epoch {16}
389/389 [==============================] - 327s 840ms/step - loss: 34.3555 - val_loss: 38.6681
Epoch {17}
389/389 [==============================] - 328s 842ms/step - loss: 29.1423 - val_loss: 53.5323
Epoch {18}
389/389 [==============================] - 328s 843ms/step - loss: 27.6896 - val_loss: 32.0927
Epoch {19}
389/389 [==============================] - 328s 844ms/step - loss: 23.5051 - val_loss: 33.8288
Epoch {20}
389/389 [==============================] - 329s 845ms/step - loss: 24.4657 - val_loss: 33.2159
Epoch {21}
389/389 [==============================] - 328s 842ms/step - loss: 25.3128 - val_loss: 33.6188
Epoch {22}
389/389 [==============================] - 329s 845ms/step - loss: 24.3259 - val_loss: 32.3556
Epoch {23}
389/389 [==============================] - 326s 839ms/step - loss: 22.8256 - val_loss: 49.4982
Epoch {24}
389/389 [==============================] - 329s 846ms/step - loss: 24.0169 - val_loss: 28.7486
Epoch {25}
389/389 [==============================] - 328s 844ms/step - loss: 48.0929 - val_loss: 59.0893
Epoch {26}
389/389 [==============================] - 325s 836ms/step - loss: 29.6776 - val_loss: 30.8032
Epoch {27}
389/389 [==============================] - 328s 843ms/step - loss: 23.2279 - val_loss: 30.0722
Epoch {28}
389/389 [==============================] - 327s 842ms/step - loss: 21.6707 - val_loss: 31.5854
Epoch {29}
389/389 [==============================] - 328s 842ms/step - loss: 21.3462 - val_loss: 31.1337
Epoch {30}
389/389 [==============================] - 328s 842ms/step - loss: 22.2737 - val_loss: 30.3173
Epoch {31}
389/389 [==============================] - 327s 841ms/step - loss: 23.2598 - val_loss: 30.5546
Epoch {32}
389/389 [==============================] - 330s 849ms/step - loss: 21.1547 - val_loss: 30.9380
Epoch {33}
389/389 [==============================] - 328s 843ms/step - loss: 22.6806 - val_loss: 29.1014
Epoch {34}
389/389 [==============================] - 327s 840ms/step - loss: 20.5276 - val_loss: 28.6941
Epoch {35}
389/389 [==============================] - 328s 842ms/step - loss: 20.4221 - val_loss: 27.7424
Epoch {36}
389/389 [==============================] - 325s 836ms/step - loss: 25.7414 - val_loss: 32.6389
Epoch {37}
389/389 [==============================] - 328s 842ms/step - loss: 20.8044 - val_loss: 39.1481
Epoch {38}
389/389 [==============================] - 329s 845ms/step - loss: 21.4919 - val_loss: 28.2595
Epoch {39}
389/389 [==============================] - 326s 838ms/step - loss: 22.0989 - val_loss: 29.1410
Epoch {40}
389/389 [==============================] - 330s 847ms/step - loss: 20.5530 - val_loss: 27.6230
Epoch {41}
389/389 [==============================] - 326s 838ms/step - loss: 21.6128 - val_loss: 30.0240
Epoch {42}
389/389 [==============================] - 326s 838ms/step - loss: 21.1913 - val_loss: 28.2491
Epoch {43}
124/389 [========>.....................] - ETA: 3:22 - loss: 18.5470
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

389/389 [==============================] - 331s 850ms/step - loss: 15.3658 - val_loss: 22.2062
Epoch {90}
389/389 [==============================] - 329s 846ms/step - loss: 14.1163 - val_loss: 23.0011
Epoch {91}
389/389 [==============================] - 330s 848ms/step - loss: 14.3330 - val_loss: 22.2124
Epoch {92}
100/389 [======>.......................] - ETA: 3:49 - loss: 13.7512
In [38]:
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]

for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  

plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
In [39]:
pred = model.predict(Xtest)
1302/1302 [==============================] - 135s 101ms/step
In [40]:
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 133s 102ms/step - loss: 17.9300
17.929988861083984
In [41]:
y = yTest.copy()
yhat = pred.copy()

# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))

print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation:
MAE: 3.026280620487356
MSE: 17.929977768579597
RMSE: 4.234380446839844
R-Squared: [0.7895483]
In [42]:
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
In [43]:
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
In [45]:
#another 50 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part3.h5')
epoch = 50

for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  historyvet.append(history.history)

model.save('bilstm_predict_rul_experiment8_extended_full_take3_part4.h5')
model.summary()
Epoch {0}
389/389 [==============================] - 341s 860ms/step - loss: 9.9645 - val_loss: 18.2545
Epoch {1}
389/389 [==============================] - 330s 849ms/step - loss: 10.0845 - val_loss: 19.1041
Epoch {2}
389/389 [==============================] - 332s 852ms/step - loss: 10.3525 - val_loss: 19.0241
Epoch {3}
389/389 [==============================] - 330s 847ms/step - loss: 11.2895 - val_loss: 16.3651
Epoch {4}
389/389 [==============================] - 332s 853ms/step - loss: 12.9427 - val_loss: 19.6775
Epoch {5}
389/389 [==============================] - 334s 858ms/step - loss: 10.2216 - val_loss: 18.1022
Epoch {6}
389/389 [==============================] - 330s 849ms/step - loss: 9.6090 - val_loss: 18.8817
Epoch {7}
389/389 [==============================] - 333s 855ms/step - loss: 9.6592 - val_loss: 19.6096
Epoch {8}
389/389 [==============================] - 330s 847ms/step - loss: 15.4943 - val_loss: 18.0857
Epoch {9}
389/389 [==============================] - 344s 886ms/step - loss: 10.0890 - val_loss: 18.4702
Epoch {10}
389/389 [==============================] - 333s 855ms/step - loss: 10.9185 - val_loss: 18.3299
Epoch {11}
389/389 [==============================] - 332s 855ms/step - loss: 9.8375 - val_loss: 17.2769
Epoch {12}
389/389 [==============================] - 334s 858ms/step - loss: 9.0572 - val_loss: 18.4719
Epoch {13}
389/389 [==============================] - 330s 848ms/step - loss: 9.1006 - val_loss: 16.8966
Epoch {14}
389/389 [==============================] - 333s 855ms/step - loss: 9.3132 - val_loss: 19.2194
Epoch {15}
389/389 [==============================] - 330s 849ms/step - loss: 10.1103 - val_loss: 17.0467
Epoch {16}
389/389 [==============================] - 332s 854ms/step - loss: 10.6557 - val_loss: 18.4849
Epoch {17}
389/389 [==============================] - 332s 853ms/step - loss: 9.1632 - val_loss: 17.1429
Epoch {18}
389/389 [==============================] - 331s 850ms/step - loss: 8.9634 - val_loss: 16.3619
Epoch {19}
389/389 [==============================] - 331s 852ms/step - loss: 15.5038 - val_loss: 19.0220
Epoch {20}
389/389 [==============================] - 330s 850ms/step - loss: 9.1171 - val_loss: 16.2541
Epoch {21}
389/389 [==============================] - 334s 859ms/step - loss: 8.4189 - val_loss: 16.1693
Epoch {22}
389/389 [==============================] - 333s 857ms/step - loss: 8.5636 - val_loss: 20.3592
Epoch {23}
389/389 [==============================] - 334s 859ms/step - loss: 9.1997 - val_loss: 17.8628
Epoch {24}
389/389 [==============================] - 333s 856ms/step - loss: 9.2838 - val_loss: 17.8284
Epoch {25}
389/389 [==============================] - 334s 859ms/step - loss: 8.8097 - val_loss: 18.5538
Epoch {26}
389/389 [==============================] - 333s 856ms/step - loss: 15.5939 - val_loss: 18.6431
Epoch {27}
389/389 [==============================] - 336s 863ms/step - loss: 12.9848 - val_loss: 26.0683
Epoch {28}
389/389 [==============================] - 341s 876ms/step - loss: 18.6866 - val_loss: 17.9619
Epoch {29}
389/389 [==============================] - 336s 865ms/step - loss: 8.7301 - val_loss: 15.2111
Epoch {30}
389/389 [==============================] - 332s 854ms/step - loss: 8.1547 - val_loss: 16.6626
Epoch {31}
389/389 [==============================] - 332s 852ms/step - loss: 8.0667 - val_loss: 16.3300
Epoch {32}
389/389 [==============================] - 335s 860ms/step - loss: 9.6312 - val_loss: 18.5143
Epoch {33}
389/389 [==============================] - 338s 868ms/step - loss: 8.8509 - val_loss: 16.0280
Epoch {34}
389/389 [==============================] - 333s 856ms/step - loss: 9.1759 - val_loss: 19.9139
Epoch {35}
389/389 [==============================] - 335s 860ms/step - loss: 9.0047 - val_loss: 16.0639
Epoch {36}
389/389 [==============================] - 333s 855ms/step - loss: 7.9623 - val_loss: 17.3039
Epoch {37}
389/389 [==============================] - 335s 861ms/step - loss: 8.1814 - val_loss: 17.1028
Epoch {38}
389/389 [==============================] - 335s 861ms/step - loss: 8.6306 - val_loss: 18.1676
Epoch {39}
389/389 [==============================] - 335s 861ms/step - loss: 8.6266 - val_loss: 16.3168
Epoch {40}
389/389 [==============================] - 339s 869ms/step - loss: 8.3986 - val_loss: 16.0175
Epoch {41}
389/389 [==============================] - 335s 860ms/step - loss: 8.4103 - val_loss: 17.0997
Epoch {42}
389/389 [==============================] - 336s 864ms/step - loss: 9.0861 - val_loss: 16.6194
Epoch {43}
389/389 [==============================] - 334s 860ms/step - loss: 8.8549 - val_loss: 16.9267
Epoch {44}
389/389 [==============================] - 333s 857ms/step - loss: 8.1499 - val_loss: 15.8692
Epoch {45}
389/389 [==============================] - 337s 866ms/step - loss: 8.9060 - val_loss: 17.0316
Epoch {46}
389/389 [==============================] - 334s 857ms/step - loss: 8.2583 - val_loss: 17.9719
Epoch {47}
389/389 [==============================] - 335s 861ms/step - loss: 8.8649 - val_loss: 15.9925
Epoch {48}
389/389 [==============================] - 335s 861ms/step - loss: 8.3989 - val_loss: 16.3942
Epoch {49}
389/389 [==============================] - 336s 863ms/step - loss: 8.0296 - val_loss: 15.1184
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirection  (None, 90, 256)           141312    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 90, 128)           164352    
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 96)                6240      
                                                                 
 dense_1 (Dense)             (None, 128)               12416     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 365665 (1.39 MB)
Trainable params: 365665 (1.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
In [46]:
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]

for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  

plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
In [47]:
pred = model.predict(Xtest)
1302/1302 [==============================] - 138s 104ms/step
In [48]:
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 136s 104ms/step - loss: 14.4658
14.465767860412598
In [49]:
y = yTest.copy()
yhat = pred.copy()

# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))

print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation:
MAE: 2.66739492077735
MSE: 14.465755477077103
RMSE: 3.8033873687907604
R-Squared: [0.83020934]
In [50]:
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
In [51]:
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
In [ ]:
 

Fine tune

In [52]:
newModel = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part4.h5')
#adam = optimizers.Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
adam=tf.keras.optimizers.Adam(learning_rate=0.001)
newModel.compile(loss='mse', optimizer=adam)
newModel.summary()
epochs = 25

newModel.fit(Xval,yVal, epochs=epochs, batch_size=500)
newModel.save('bilstm_predict_rul_experiment8_extended_full_take3_part4_best.h5')
newModel.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirection  (None, 90, 256)           141312    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 90, 128)           164352    
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 96)                6240      
                                                                 
 dense_1 (Dense)             (None, 128)               12416     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 365665 (1.39 MB)
Trainable params: 365665 (1.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/25
84/84 [==============================] - 78s 773ms/step - loss: 18.8138
Epoch 2/25
84/84 [==============================] - 65s 775ms/step - loss: 15.1091
Epoch 3/25
84/84 [==============================] - 66s 780ms/step - loss: 23.6433
Epoch 4/25
84/84 [==============================] - 65s 779ms/step - loss: 15.5591
Epoch 5/25
84/84 [==============================] - 65s 772ms/step - loss: 12.9244
Epoch 6/25
84/84 [==============================] - 66s 783ms/step - loss: 11.1757
Epoch 7/25
84/84 [==============================] - 65s 775ms/step - loss: 10.3805
Epoch 8/25
84/84 [==============================] - 65s 778ms/step - loss: 9.4869
Epoch 9/25
84/84 [==============================] - 64s 768ms/step - loss: 8.9791
Epoch 10/25
84/84 [==============================] - 65s 776ms/step - loss: 8.6154
Epoch 11/25
84/84 [==============================] - 65s 777ms/step - loss: 8.4584
Epoch 12/25
84/84 [==============================] - 65s 775ms/step - loss: 10.6611
Epoch 13/25
84/84 [==============================] - 65s 767ms/step - loss: 8.3051
Epoch 14/25
84/84 [==============================] - 65s 769ms/step - loss: 9.2540
Epoch 15/25
84/84 [==============================] - 65s 770ms/step - loss: 8.8023
Epoch 16/25
84/84 [==============================] - 65s 775ms/step - loss: 7.9576
Epoch 17/25
84/84 [==============================] - 64s 767ms/step - loss: 6.7822
Epoch 18/25
84/84 [==============================] - 65s 771ms/step - loss: 6.8865
Epoch 19/25
84/84 [==============================] - 64s 767ms/step - loss: 7.3095
Epoch 20/25
84/84 [==============================] - 65s 772ms/step - loss: 7.3847
Epoch 21/25
84/84 [==============================] - 65s 779ms/step - loss: 7.3081
Epoch 22/25
84/84 [==============================] - 65s 775ms/step - loss: 16.4596
Epoch 23/25
84/84 [==============================] - 65s 771ms/step - loss: 14.7284
Epoch 24/25
84/84 [==============================] - 66s 780ms/step - loss: 9.3644
Epoch 25/25
84/84 [==============================] - 65s 778ms/step - loss: 6.8536
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirection  (None, 90, 256)           141312    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 90, 128)           164352    
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 96)                6240      
                                                                 
 dense_1 (Dense)             (None, 128)               12416     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 365665 (1.39 MB)
Trainable params: 365665 (1.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

Performance evaluation¶

In [53]:
print(newModel.evaluate(Xtest,yTest))
1302/1302 [==============================] - 135s 102ms/step - loss: 16.8626
16.862579345703125
In [54]:
pred = newModel.predict(Xtest)
1302/1302 [==============================] - 132s 99ms/step
In [55]:
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
In [56]:
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
In [57]:
y = yTest.copy()
yhat = pred.copy()
In [58]:
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))

print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better - in my case >85% after training on the val dataset
Results by manual calculation:
MAE: 2.947035403738186
MSE: 16.86256980900072
RMSE: 4.106405947906358
R-Squared: [0.80207692]
In [59]:
results = pd.DataFrame({'Predicted':pred.flatten()})
results['Actual'] = yTest.flatten()
results
Out[59]:
Predicted Actual
0 14.199033 15
1 13.143090 13
2 4.791924 1
3 14.558222 19
4 13.416628 15
... ... ...
41628 26.579113 23
41629 19.725632 12
41630 21.202875 27
41631 4.445514 3
41632 12.522565 15

41633 rows × 2 columns