#libraries
import pandas as pd
import numpy as np
import datetime
from datetime import datetime  
import csv
import os
from os import listdir
import json
import csv
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import RNN
#from keras.utils.np_utils import to_categorical
import keras.backend as K
from keras import regularizers,optimizers
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RepeatedKFold 
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn import tree
#from sklearn.externals.six import StringIO  
#import six
from six import StringIO
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus
np.random.seed(2018)
2023-08-23 13:08:02.770155: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. 2023-08-23 13:08:03.655052: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. 2023-08-23 13:08:03.661031: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2023-08-23 13:08:06.240519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
import pymysql
#establish the connection to the mysql database
host = "192.168.88.187"
port = "3306"
user = "backblaze"
password = "Testing.2023"
database = "backblaze_ml_full"
conn = pymysql.connect(
    host=host,
    port=int(3306),
    user=user,
    passwd=password,
    db=database,
    charset='utf8mb4')
sqldf = pd.read_sql_query("select date, serial_number, model, capacity_bytes, days_to_failure, failure, smart_1_normalized, smart_3_normalized, smart_5_normalized, smart_7_normalized, smart_9_normalized, smart_187_normalized, smart_189_normalized, smart_194_normalized, smart_197_normalized from drive_stats where date >= '2014-03-01' and serial_number in (select distinct(serial_number) from drive_stats where failure=1 and date >= '2014-03-01')", conn)
sqldf
/tmp/ipykernel_2225929/1261091465.py:1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
  sqldf = pd.read_sql_query("select date, serial_number, model, capacity_bytes, days_to_failure, failure, smart_1_normalized, smart_3_normalized, smart_5_normalized, smart_7_normalized, smart_9_normalized, smart_187_normalized, smart_189_normalized, smart_194_normalized, smart_197_normalized from drive_stats where date >= '2014-03-01' and serial_number in (select distinct(serial_number) from drive_stats where failure=1 and date >= '2014-03-01')", conn)
| date | serial_number | model | capacity_bytes | days_to_failure | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2014-03-01 | MJ1311YNG36USA | Hitachi HDS5C3030ALA630 | 3000592982016 | 991 | 0 | 100.0 | 138.0 | 100.0 | 100.0 | 98.0 | NaN | NaN | 253.0 | 100.0 | 
| 1 | 2014-03-01 | MJ1311YNG733NA | Hitachi HDS5C3030ALA630 | 3000592982016 | 840 | 0 | 100.0 | 100.0 | 100.0 | 100.0 | 98.0 | NaN | NaN | 250.0 | 100.0 | 
| 2 | 2014-03-01 | W3009AX6 | ST4000DM000 | 4000787030016 | 54 | 0 | 119.0 | 91.0 | 100.0 | 87.0 | 93.0 | 100.0 | 99.0 | 26.0 | 100.0 | 
| 3 | 2014-03-01 | WD-WCAV5M690585 | WDC WD10EADS | 1000204886016 | 409 | 0 | 200.0 | 191.0 | 200.0 | 100.0 | 68.0 | NaN | NaN | 127.0 | 200.0 | 
| 4 | 2014-03-01 | S1F0CSW2 | ST3000DM001 | 3000592982016 | 229 | 0 | 114.0 | 92.0 | 100.0 | 89.0 | 84.0 | 100.0 | 100.0 | 23.0 | 100.0 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 14769522 | 2023-03-31 | 7LZ01G30 | Seagate BarraCuda SSD ZA250CM10002 | 250059350016 | 0 | 0 | 100.0 | NaN | NaN | NaN | 100.0 | NaN | NaN | 83.0 | NaN | 
| 14769523 | 2023-03-31 | 9JG4657T | WDC WUH721414ALE6L4 | 14000519643136 | 0 | 0 | 100.0 | 85.0 | 100.0 | 100.0 | 98.0 | NaN | NaN | 55.0 | 100.0 | 
| 14769524 | 2023-03-31 | 6090A00RFVKG | TOSHIBA MG08ACA16TA | 16000900661248 | 0 | 0 | 100.0 | 100.0 | 100.0 | 100.0 | 87.0 | NaN | NaN | 100.0 | 100.0 | 
| 14769525 | 2023-03-31 | 51R0A2Q8FVGG | TOSHIBA MG08ACA16TE | 16000900661248 | 0 | 0 | 100.0 | 100.0 | 100.0 | 100.0 | 70.0 | NaN | NaN | 100.0 | 100.0 | 
| 14769526 | 2023-03-31 | 7QT032NR | Seagate BarraCuda 120 SSD ZA250CM10003 | 250059350016 | 0 | 0 | 100.0 | NaN | NaN | NaN | 100.0 | NaN | NaN | 96.0 | NaN | 
14769527 rows × 15 columns
def computeDay(group):
  group = group.sort_values('date')    #ordino in base ai giorni... dal più recente al meno
  group['DayToFailure'] = list(range(group.shape[0]-1, -1,-1 ))
  return group
#override the series_to_supervised method to work without classes
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
  n_vars = data.shape[1]
  cols, names = list(), list()
  dataclass = data[data.columns[-1:]]
  data = data.drop(columns= ['serial_number', 'DayToFailure'], axis = 1)
  columns = data.columns
  # input sequence (t-n, ... t-1)  #non arrivo all'osservazione corrente
  for i in range(n_in-1, 0, -1):
    cols.append(data.shift(i))
    names += [(element + '(t-%d)' % (i)) for element in columns]
    
  for i in range(0, n_out):
    cols.append(data.shift(-i))
    if i == 0:
      names += [(element+'(t)') for element in columns]
    else:
      names += [(element +'(t+%d)' % (i)) for element in columns]
  
  cols.append(dataclass)   #appendo le ultime cinque colonne
  names += ['DayToFailure']
    
  agg = pd.concat(cols, axis=1)
  agg.columns = names
  if dropnan:
    agg.dropna(inplace=True)
  
  return agg
#Preprocessing
df = sqldf.copy()
df = df.drop('model', axis=1)
df = df.drop('capacity_bytes', axis=1)
df.date = pd.to_datetime(df.date, format='%Y-%m-%d').dt.date
scaler = MinMaxScaler(feature_range = (-1,1))
df[['smart_1_normalized', 'smart_3_normalized', 'smart_5_normalized', 'smart_7_normalized',
    'smart_9_normalized', 'smart_187_normalized', 'smart_189_normalized', 'smart_194_normalized',
    'smart_197_normalized']] = scaler.fit_transform(df[['smart_1_normalized', 'smart_3_normalized', 
    'smart_5_normalized', 'smart_7_normalized', 'smart_9_normalized', 'smart_187_normalized', 
    'smart_189_normalized', 'smart_194_normalized', 'smart_197_normalized']])
df
| date | serial_number | days_to_failure | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2014-03-01 | MJ1311YNG36USA | 991 | 0 | -0.005025 | -0.256831 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 1.000000 | -0.211155 | 
| 1 | 2014-03-01 | MJ1311YNG733NA | 840 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 0.975104 | -0.211155 | 
| 2 | 2014-03-01 | W3009AX6 | 54 | 0 | 0.185930 | -0.770492 | -0.211155 | -0.314741 | -0.266932 | 1.0 | 0.979798 | -0.883817 | -0.211155 | 
| 3 | 2014-03-01 | WD-WCAV5M690585 | 409 | 0 | 1.000000 | 0.322404 | 0.585657 | -0.211155 | -0.466135 | NaN | NaN | -0.045643 | 0.585657 | 
| 4 | 2014-03-01 | S1F0CSW2 | 229 | 0 | 0.135678 | -0.759563 | -0.211155 | -0.298805 | -0.338645 | 1.0 | 1.000000 | -0.908714 | -0.211155 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 14769522 | 2023-03-31 | 7LZ01G30 | 0 | 0 | -0.005025 | NaN | NaN | NaN | -0.211155 | NaN | NaN | -0.410788 | NaN | 
| 14769523 | 2023-03-31 | 9JG4657T | 0 | 0 | -0.005025 | -0.836066 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | -0.643154 | -0.211155 | 
| 14769524 | 2023-03-31 | 6090A00RFVKG | 0 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.314741 | NaN | NaN | -0.269710 | -0.211155 | 
| 14769525 | 2023-03-31 | 51R0A2Q8FVGG | 0 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.450199 | NaN | NaN | -0.269710 | -0.211155 | 
| 14769526 | 2023-03-31 | 7QT032NR | 0 | 0 | -0.005025 | NaN | NaN | NaN | -0.211155 | NaN | NaN | -0.302905 | NaN | 
14769527 rows × 13 columns
#cleanup garbage entries -> apparently there are entries which have a failure reported on a date
#and then they still report measurements after that date -> these need to be cleared
test=df.copy();
#test
test2 = pd.DataFrame({'serial_number':test.loc[test['failure'] == 1]['serial_number'], 'failure_date':test.loc[test['failure'] == 1]['date']})
#test2
test3 = test.join(test2.set_index('serial_number'), on='serial_number')
#test3
clean = test3.drop(test3[test3['date'] > test3['failure_date']].index)
clean = clean.drop('failure_date', axis=1)
clean
| date | serial_number | days_to_failure | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2014-03-01 | MJ1311YNG36USA | 991 | 0 | -0.005025 | -0.256831 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 1.000000 | -0.211155 | 
| 1 | 2014-03-01 | MJ1311YNG733NA | 840 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 0.975104 | -0.211155 | 
| 2 | 2014-03-01 | W3009AX6 | 54 | 0 | 0.185930 | -0.770492 | -0.211155 | -0.314741 | -0.266932 | 1.000000 | 0.979798 | -0.883817 | -0.211155 | 
| 3 | 2014-03-01 | WD-WCAV5M690585 | 409 | 0 | 1.000000 | 0.322404 | 0.585657 | -0.211155 | -0.466135 | NaN | NaN | -0.045643 | 0.585657 | 
| 4 | 2014-03-01 | S1F0CSW2 | 229 | 0 | 0.135678 | -0.759563 | -0.211155 | -0.298805 | -0.338645 | 1.000000 | 1.000000 | -0.908714 | -0.211155 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 14769333 | 2023-03-30 | 2AGMNB7Y | 0 | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.243028 | NaN | NaN | 0.078838 | -0.211155 | 
| 14769335 | 2023-03-30 | 8HH0KRGH | 0 | 1 | -0.246231 | -0.672131 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | 0.278008 | -0.211155 | 
| 14769341 | 2023-03-30 | ZLW16KEQ | 0 | 1 | -0.226131 | -0.781421 | -0.211155 | -0.362550 | -0.402390 | 0.979798 | NaN | -0.809129 | -0.211155 | 
| 14769343 | 2023-03-30 | X0GE5KSC | 0 | 1 | -0.005025 | -0.737705 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | -0.551867 | -0.211155 | 
| 14769346 | 2023-03-30 | 61B0A03NF97G | 0 | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.498008 | NaN | NaN | -0.269710 | -0.211155 | 
14442321 rows × 13 columns
df=clean.copy()
df = df.drop(columns= ['days_to_failure'], axis = 1)
df
| date | serial_number | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2014-03-01 | MJ1311YNG36USA | 0 | -0.005025 | -0.256831 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 1.000000 | -0.211155 | 
| 1 | 2014-03-01 | MJ1311YNG733NA | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 0.975104 | -0.211155 | 
| 2 | 2014-03-01 | W3009AX6 | 0 | 0.185930 | -0.770492 | -0.211155 | -0.314741 | -0.266932 | 1.000000 | 0.979798 | -0.883817 | -0.211155 | 
| 3 | 2014-03-01 | WD-WCAV5M690585 | 0 | 1.000000 | 0.322404 | 0.585657 | -0.211155 | -0.466135 | NaN | NaN | -0.045643 | 0.585657 | 
| 4 | 2014-03-01 | S1F0CSW2 | 0 | 0.135678 | -0.759563 | -0.211155 | -0.298805 | -0.338645 | 1.000000 | 1.000000 | -0.908714 | -0.211155 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 14769333 | 2023-03-30 | 2AGMNB7Y | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.243028 | NaN | NaN | 0.078838 | -0.211155 | 
| 14769335 | 2023-03-30 | 8HH0KRGH | 1 | -0.246231 | -0.672131 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | 0.278008 | -0.211155 | 
| 14769341 | 2023-03-30 | ZLW16KEQ | 1 | -0.226131 | -0.781421 | -0.211155 | -0.362550 | -0.402390 | 0.979798 | NaN | -0.809129 | -0.211155 | 
| 14769343 | 2023-03-30 | X0GE5KSC | 1 | -0.005025 | -0.737705 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | -0.551867 | -0.211155 | 
| 14769346 | 2023-03-30 | 61B0A03NF97G | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.498008 | NaN | NaN | -0.269710 | -0.211155 | 
14442321 rows × 12 columns
dfHour = df.groupby(['serial_number']).apply(computeDay)
dfHour = dfHour[dfHour.DayToFailure <= 120]
dfHour = dfHour.drop(columns = ['date'])
dfHour= dfHour.drop(columns= ['failure','serial_number'], axis=1)
dfHour=dfHour.reset_index()
dfHour= dfHour.drop(columns= ['level_1'], axis=1)
window=90
print('Creating the sequence')
dfHourSequence =  dfHour.groupby(['serial_number']).apply(series_to_supervised, n_in=window, n_out=1, dropnan=True)
dfHourSequence
Creating the sequence
| smart_1_normalized(t-89) | smart_3_normalized(t-89) | smart_5_normalized(t-89) | smart_7_normalized(t-89) | smart_9_normalized(t-89) | smart_187_normalized(t-89) | smart_189_normalized(t-89) | smart_194_normalized(t-89) | smart_197_normalized(t-89) | smart_1_normalized(t-88) | ... | smart_1_normalized(t) | smart_3_normalized(t) | smart_5_normalized(t) | smart_7_normalized(t) | smart_9_normalized(t) | smart_187_normalized(t) | smart_189_normalized(t) | smart_194_normalized(t) | smart_197_normalized(t) | DayToFailure | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| serial_number | ||||||||||||||||||||||
| 5VML01P0 | 107293 | 0.175879 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.135678 | ... | 0.165829 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 31 | 
| 107294 | 0.135678 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.185930 | ... | 0.105528 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 30 | |
| 107295 | 0.185930 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.155779 | ... | 0.175879 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 29 | |
| 107296 | 0.155779 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.195980 | ... | 0.135678 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 28 | |
| 107297 | 0.195980 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.875519 | -0.211155 | 0.165829 | ... | 0.175879 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 27 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| ZTT3STWF | 1987895 | -0.206030 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.206030 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 4 | 
| 1987896 | -0.206030 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.236181 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 3 | |
| 1987897 | -0.236181 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.933610 | -0.211155 | -0.216080 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 2 | |
| 1987898 | -0.216080 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.206030 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 1 | |
| 1987899 | -0.206030 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.185930 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 0 | 
277551 rows × 811 columns
print('Dividing into train test')
X_train, X_rim, y_train, y_rim = train_test_split(dfHourSequence[dfHourSequence.columns[:-1]], 
                                                  dfHourSequence[dfHourSequence.columns[-1:]] ,
                                                  stratify=dfHourSequence[dfHourSequence.columns[-1:]], 
                                                  test_size=0.30)
Dividing into train test
print(y_train)
print(y_train.columns)
DayToFailure serial_number W300T3EK 687360 6 Z305DHV4 1231919 11 ZA17ZFEW 1446046 11 Z302SWXT 1021026 5 S301KQQX 554027 26 ... ... Z304TK95 1166742 6 Z305FNVM 1240279 0 Z303N1QB 1052406 10 W300R8BD 683889 3 Z302B11W 999139 29 [194285 rows x 1 columns] Index(['DayToFailure'], dtype='object')
X_val, X_test, y_val, y_test = train_test_split(X_rim, y_rim ,stratify=y_rim, test_size=0.50)
X_train = pd.concat([X_train, pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_val = pd.concat([X_val,  pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_test = pd.concat([X_test, pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_train[['DayToFailure']] = y_train.values
X_val[['DayToFailure']] = y_val.values
X_test[['DayToFailure']] = y_test.values
X_train
#X_val
#X_test
| DayToFailure | smart_187_normalized(t) | smart_187_normalized(t-1) | smart_187_normalized(t-10) | smart_187_normalized(t-11) | smart_187_normalized(t-12) | smart_187_normalized(t-13) | smart_187_normalized(t-14) | smart_187_normalized(t-15) | smart_187_normalized(t-16) | ... | smart_9_normalized(t-81) | smart_9_normalized(t-82) | smart_9_normalized(t-83) | smart_9_normalized(t-84) | smart_9_normalized(t-85) | smart_9_normalized(t-86) | smart_9_normalized(t-87) | smart_9_normalized(t-88) | smart_9_normalized(t-89) | smart_9_normalized(t-9) | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| W300T3EK | 687360 | 6 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.235060 | 
| Z305DHV4 | 1231919 | 11 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.474104 | 
| ZA17ZFEW | 1446046 | 11 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | ... | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.633466 | 
| Z302SWXT | 1021026 | 5 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.330677 | 
| S301KQQX | 554027 | 26 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.330677 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| Z304TK95 | 1166742 | 6 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.272727 | ... | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.458167 | 
| Z305FNVM | 1240279 | 0 | -0.131313 | 0.272727 | 0.454545 | 0.575758 | 0.575758 | 0.575758 | 0.575758 | 0.575758 | 0.575758 | ... | -0.593625 | -0.593625 | -0.593625 | -0.593625 | -0.593625 | -0.593625 | -0.585657 | -0.585657 | -0.585657 | -0.609562 | 
| Z303N1QB | 1052406 | 10 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | ... | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.482072 | 
| W300R8BD | 683889 | 3 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.418327 | -0.418327 | -0.442231 | 
| Z302B11W | 999139 | 29 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | ... | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.474104 | 
194285 rows × 811 columns
X_train.shape
(194285, 811)
X_val.shape
(41633, 811)
Xtrain = X_train.copy()
Xtrain = Xtrain.drop(columns=['DayToFailure'], axis=1 )
Xtrain.shape
#Xtrain
(194285, 810)
Xval = X_val.copy()
Xval = Xval.drop(columns=['DayToFailure'], axis=1 )
Xval.shape
(41633, 810)
yTest = X_test[['DayToFailure']].values
#yTest
Xtest = X_test.drop(columns=['DayToFailure'], axis=1 )
#Xtest
#reshape with window
Xtrain = Xtrain.values.reshape(Xtrain.shape[0], window, int(Xtrain.shape[1]/window))
Xval = Xval.values.reshape(Xval.shape[0], window, int(Xval.shape[1]/window))
Xtest= Xtest.values.reshape(Xtest.shape[0], window, int(Xtest.shape[1]/window))
ytrain = X_train[['DayToFailure']].values
yVal = X_val[['DayToFailure']].values
print(Xtrain.shape)
print(Xval.shape)
print(Xtest.shape)
print(ytrain.shape)
print(yVal.shape)
print(yTest.shape)
(194285, 90, 9) (41633, 90, 9) (41633, 90, 9) (194285, 1) (41633, 1) (41633, 1)
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import keras
#same as experiment6 - BiLSTM
def build_model():
  dp_lvl = 0.1
  model = Sequential()
  model.add(Bidirectional(LSTM(128, input_shape=(window, 9), return_sequences =  True, activation = "tanh" )))
  model.add(Bidirectional(LSTM(64, return_sequences =  True, activation = "tanh" )))
  model.add(Bidirectional(LSTM(32, activation="tanh")))
  model.add(Dense(96, activation='relu'))
  model.add(Dense(128, activation='relu'))
  model.add(Dense(1))
  return model
#same as experiment6 - LSTM
epoch = 150
historyvet =[]
model = build_model()    
best_acc= 0.0
#adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0,amsgrad=False)
adam = tf.keras.optimizers.legacy.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0,amsgrad=False)
#adam=tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=adam)
for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  #model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal))
  historyvet.append(history.history)
    
model.save('bilstm_predict_rul_experiment8_extended_full_take3.h5')
model.summary()
/usr/local/lib/python3.10/dist-packages/keras/src/optimizers/legacy/adam.py:118: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead. super().__init__(name, **kwargs)
Epoch {0}
389/389 [==============================] - 340s 841ms/step - loss: 92.4284 - val_loss: 85.2481
Epoch {1}
389/389 [==============================] - 321s 825ms/step - loss: 85.1641 - val_loss: 85.7237
Epoch {2}
389/389 [==============================] - 323s 829ms/step - loss: 84.6595 - val_loss: 83.9631
Epoch {3}
194/389 [=============>................] - ETA: 2:26 - loss: 84.2638
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
#skip this cell if the above one finished and you are continuing the work , otherwise if say you disconnected the notebook and want to resume run this one to load the model it generated overnight
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3.h5')
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 143s 108ms/step - loss: 67.4307 67.43070220947266
pred = model.predict(Xtest)
1302/1302 [==============================] - 138s 104ms/step
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation: MAE: 6.714504805846707 MSE: 67.43068458432414 RMSE: 8.211618877196148 R-Squared: [0.20853765]
More training as it looks like it wants to get somewhere interesting
#another 150 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3.h5')
epoch = 150
for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3_part2.h5')
model.summary()
Epoch {0}
389/389 [==============================] - 343s 862ms/step - loss: 64.2593 - val_loss: 68.3754
Epoch {1}
389/389 [==============================] - 332s 852ms/step - loss: 63.3964 - val_loss: 67.4172
Epoch {2}
389/389 [==============================] - 329s 847ms/step - loss: 63.3546 - val_loss: 67.3026
Epoch {3}
389/389 [==============================] - 332s 854ms/step - loss: 62.9733 - val_loss: 67.0207
Epoch {4}
389/389 [==============================] - 333s 856ms/step - loss: 62.8247 - val_loss: 67.2608
Epoch {5}
389/389 [==============================] - 329s 846ms/step - loss: 62.2602 - val_loss: 67.5695
Epoch {6}
389/389 [==============================] - 336s 862ms/step - loss: 62.5081 - val_loss: 66.4262
Epoch {7}
389/389 [==============================] - 332s 855ms/step - loss: 62.3149 - val_loss: 66.7445
Epoch {8}
389/389 [==============================] - 337s 866ms/step - loss: 61.9950 - val_loss: 66.2608
Epoch {9}
389/389 [==============================] - 334s 859ms/step - loss: 61.4956 - val_loss: 65.5214
Epoch {10}
389/389 [==============================] - 334s 859ms/step - loss: 61.2620 - val_loss: 65.0429
Epoch {11}
389/389 [==============================] - 334s 859ms/step - loss: 61.1446 - val_loss: 65.6227
Epoch {12}
389/389 [==============================] - 327s 841ms/step - loss: 60.8006 - val_loss: 65.3122
Epoch {13}
389/389 [==============================] - 331s 851ms/step - loss: 61.2606 - val_loss: 64.9975
Epoch {14}
389/389 [==============================] - 328s 842ms/step - loss: 60.2370 - val_loss: 64.4013
Epoch {15}
389/389 [==============================] - 329s 847ms/step - loss: 60.1839 - val_loss: 64.5551
Epoch {16}
389/389 [==============================] - 330s 849ms/step - loss: 59.7568 - val_loss: 64.6951
Epoch {17}
389/389 [==============================] - 332s 854ms/step - loss: 59.8941 - val_loss: 63.5398
Epoch {18}
389/389 [==============================] - 335s 862ms/step - loss: 59.6660 - val_loss: 64.0032
Epoch {19}
389/389 [==============================] - 332s 853ms/step - loss: 59.0889 - val_loss: 64.5364
Epoch {20}
389/389 [==============================] - 333s 857ms/step - loss: 58.7660 - val_loss: 63.8183
Epoch {21}
389/389 [==============================] - 333s 855ms/step - loss: 59.8411 - val_loss: 64.7533
Epoch {22}
389/389 [==============================] - 331s 852ms/step - loss: 58.6592 - val_loss: 63.8076
Epoch {23}
389/389 [==============================] - 332s 855ms/step - loss: 58.1071 - val_loss: 63.0844
Epoch {24}
389/389 [==============================] - 331s 852ms/step - loss: 57.9922 - val_loss: 63.0138
Epoch {25}
389/389 [==============================] - 334s 860ms/step - loss: 58.1064 - val_loss: 63.5760
Epoch {26}
389/389 [==============================] - 333s 856ms/step - loss: 57.3484 - val_loss: 62.2100
Epoch {27}
389/389 [==============================] - 332s 853ms/step - loss: 57.0489 - val_loss: 61.7560
Epoch {28}
389/389 [==============================] - 333s 856ms/step - loss: 56.7409 - val_loss: 61.9766
Epoch {29}
389/389 [==============================] - 329s 846ms/step - loss: 56.1090 - val_loss: 60.9149
Epoch {30}
389/389 [==============================] - 336s 864ms/step - loss: 56.4711 - val_loss: 60.8571
Epoch {31}
389/389 [==============================] - 331s 851ms/step - loss: 56.0427 - val_loss: 61.6544
Epoch {32}
389/389 [==============================] - 330s 849ms/step - loss: 57.5427 - val_loss: 66.4305
Epoch {33}
389/389 [==============================] - 330s 848ms/step - loss: 56.3025 - val_loss: 59.7977
Epoch {34}
389/389 [==============================] - 328s 842ms/step - loss: 55.1212 - val_loss: 61.3966
Epoch {35}
389/389 [==============================] - 328s 843ms/step - loss: 54.9564 - val_loss: 59.9022
Epoch {36}
389/389 [==============================] - 327s 841ms/step - loss: 54.4106 - val_loss: 60.0068
Epoch {37}
389/389 [==============================] - 329s 845ms/step - loss: 54.4488 - val_loss: 59.5027
Epoch {38}
389/389 [==============================] - 330s 847ms/step - loss: 53.9621 - val_loss: 58.7557
Epoch {39}
389/389 [==============================] - 326s 839ms/step - loss: 53.7698 - val_loss: 58.6915
Epoch {40}
389/389 [==============================] - 327s 841ms/step - loss: 53.8168 - val_loss: 61.1053
Epoch {41}
389/389 [==============================] - 326s 838ms/step - loss: 54.2586 - val_loss: 61.5899
Epoch {42}
389/389 [==============================] - 329s 847ms/step - loss: 53.1145 - val_loss: 58.5002
Epoch {43}
389/389 [==============================] - 327s 841ms/step - loss: 52.4294 - val_loss: 58.4195
Epoch {44}
389/389 [==============================] - 327s 841ms/step - loss: 52.4108 - val_loss: 57.0662
Epoch {45}
389/389 [==============================] - 327s 840ms/step - loss: 52.5267 - val_loss: 57.3894
Epoch {46}
389/389 [==============================] - 327s 840ms/step - loss: 51.9211 - val_loss: 57.6161
Epoch {47}
389/389 [==============================] - 328s 844ms/step - loss: 52.1283 - val_loss: 56.9461
Epoch {48}
389/389 [==============================] - 327s 840ms/step - loss: 51.1085 - val_loss: 56.3701
Epoch {49}
389/389 [==============================] - 330s 848ms/step - loss: 50.8462 - val_loss: 57.8374
Epoch {50}
389/389 [==============================] - 326s 839ms/step - loss: 51.0079 - val_loss: 56.1585
Epoch {51}
389/389 [==============================] - 328s 842ms/step - loss: 50.0825 - val_loss: 56.3187
Epoch {52}
389/389 [==============================] - 325s 837ms/step - loss: 50.1658 - val_loss: 56.2422
Epoch {53}
389/389 [==============================] - 327s 841ms/step - loss: 49.6024 - val_loss: 55.4322
Epoch {54}
389/389 [==============================] - 329s 846ms/step - loss: 49.7728 - val_loss: 54.7449
Epoch {55}
389/389 [==============================] - 326s 838ms/step - loss: 49.6413 - val_loss: 54.7127
Epoch {56}
389/389 [==============================] - 329s 845ms/step - loss: 48.9819 - val_loss: 55.4102
Epoch {57}
389/389 [==============================] - 325s 836ms/step - loss: 48.8032 - val_loss: 55.7070
Epoch {58}
389/389 [==============================] - 326s 839ms/step - loss: 48.5199 - val_loss: 56.3114
Epoch {59}
389/389 [==============================] - 328s 844ms/step - loss: 47.7044 - val_loss: 54.0407
Epoch {60}
389/389 [==============================] - 327s 841ms/step - loss: 47.6278 - val_loss: 52.7045
Epoch {61}
389/389 [==============================] - 333s 855ms/step - loss: 47.4723 - val_loss: 53.4739
Epoch {62}
389/389 [==============================] - 326s 837ms/step - loss: 47.1001 - val_loss: 56.5136
Epoch {63}
389/389 [==============================] - 328s 842ms/step - loss: 48.0536 - val_loss: 55.6724
Epoch {64}
389/389 [==============================] - 329s 845ms/step - loss: 46.8253 - val_loss: 53.2679
Epoch {65}
389/389 [==============================] - 327s 841ms/step - loss: 46.7869 - val_loss: 52.4258
Epoch {66}
389/389 [==============================] - 328s 844ms/step - loss: 46.4162 - val_loss: 54.8050
Epoch {67}
389/389 [==============================] - 325s 836ms/step - loss: 45.9856 - val_loss: 52.2177
Epoch {68}
389/389 [==============================] - 326s 838ms/step - loss: 44.9784 - val_loss: 52.2360
Epoch {69}
389/389 [==============================] - ETA: 0s - loss: 44.7546
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
pred = model.predict(Xtest)
1302/1302 [==============================] - 125s 94ms/step
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 126s 96ms/step - loss: 33.3588 33.35878372192383
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation: MAE: 4.252981828318384 MSE: 33.35878218176548 RMSE: 5.775706206323646 R-Squared: [0.60845392]
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation: MAE: 4.252981828318384 MSE: 33.35878218176548 RMSE: 5.775706206323646 R-Squared: [0.60845392]
#another 150 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part2.h5')
epoch = 150
for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3_part3.h5')
model.summary()
Epoch {0}
389/389 [==============================] - 338s 853ms/step - loss: 25.8730 - val_loss: 36.7638
Epoch {1}
389/389 [==============================] - 330s 847ms/step - loss: 28.3172 - val_loss: 33.4057
Epoch {2}
389/389 [==============================] - 328s 842ms/step - loss: 27.1991 - val_loss: 35.4056
Epoch {3}
389/389 [==============================] - 328s 844ms/step - loss: 27.9181 - val_loss: 34.8644
Epoch {4}
389/389 [==============================] - 328s 843ms/step - loss: 26.4577 - val_loss: 37.4612
Epoch {5}
389/389 [==============================] - 328s 843ms/step - loss: 26.3098 - val_loss: 35.6996
Epoch {6}
389/389 [==============================] - 331s 851ms/step - loss: 24.5580 - val_loss: 30.9044
Epoch {7}
389/389 [==============================] - 328s 843ms/step - loss: 23.9100 - val_loss: 36.0782
Epoch {8}
389/389 [==============================] - 331s 851ms/step - loss: 40.0271 - val_loss: 38.7793
Epoch {9}
389/389 [==============================] - 326s 838ms/step - loss: 27.4546 - val_loss: 36.4517
Epoch {10}
389/389 [==============================] - 328s 843ms/step - loss: 27.2428 - val_loss: 33.3448
Epoch {11}
389/389 [==============================] - 328s 843ms/step - loss: 37.9628 - val_loss: 64.6191
Epoch {12}
389/389 [==============================] - 327s 840ms/step - loss: 45.6755 - val_loss: 42.1760
Epoch {13}
389/389 [==============================] - 330s 849ms/step - loss: 29.6218 - val_loss: 33.5977
Epoch {14}
389/389 [==============================] - 326s 838ms/step - loss: 25.4958 - val_loss: 33.2421
Epoch {15}
389/389 [==============================] - 329s 845ms/step - loss: 30.3199 - val_loss: 45.8646
Epoch {16}
389/389 [==============================] - 327s 840ms/step - loss: 34.3555 - val_loss: 38.6681
Epoch {17}
389/389 [==============================] - 328s 842ms/step - loss: 29.1423 - val_loss: 53.5323
Epoch {18}
389/389 [==============================] - 328s 843ms/step - loss: 27.6896 - val_loss: 32.0927
Epoch {19}
389/389 [==============================] - 328s 844ms/step - loss: 23.5051 - val_loss: 33.8288
Epoch {20}
389/389 [==============================] - 329s 845ms/step - loss: 24.4657 - val_loss: 33.2159
Epoch {21}
389/389 [==============================] - 328s 842ms/step - loss: 25.3128 - val_loss: 33.6188
Epoch {22}
389/389 [==============================] - 329s 845ms/step - loss: 24.3259 - val_loss: 32.3556
Epoch {23}
389/389 [==============================] - 326s 839ms/step - loss: 22.8256 - val_loss: 49.4982
Epoch {24}
389/389 [==============================] - 329s 846ms/step - loss: 24.0169 - val_loss: 28.7486
Epoch {25}
389/389 [==============================] - 328s 844ms/step - loss: 48.0929 - val_loss: 59.0893
Epoch {26}
389/389 [==============================] - 325s 836ms/step - loss: 29.6776 - val_loss: 30.8032
Epoch {27}
389/389 [==============================] - 328s 843ms/step - loss: 23.2279 - val_loss: 30.0722
Epoch {28}
389/389 [==============================] - 327s 842ms/step - loss: 21.6707 - val_loss: 31.5854
Epoch {29}
389/389 [==============================] - 328s 842ms/step - loss: 21.3462 - val_loss: 31.1337
Epoch {30}
389/389 [==============================] - 328s 842ms/step - loss: 22.2737 - val_loss: 30.3173
Epoch {31}
389/389 [==============================] - 327s 841ms/step - loss: 23.2598 - val_loss: 30.5546
Epoch {32}
389/389 [==============================] - 330s 849ms/step - loss: 21.1547 - val_loss: 30.9380
Epoch {33}
389/389 [==============================] - 328s 843ms/step - loss: 22.6806 - val_loss: 29.1014
Epoch {34}
389/389 [==============================] - 327s 840ms/step - loss: 20.5276 - val_loss: 28.6941
Epoch {35}
389/389 [==============================] - 328s 842ms/step - loss: 20.4221 - val_loss: 27.7424
Epoch {36}
389/389 [==============================] - 325s 836ms/step - loss: 25.7414 - val_loss: 32.6389
Epoch {37}
389/389 [==============================] - 328s 842ms/step - loss: 20.8044 - val_loss: 39.1481
Epoch {38}
389/389 [==============================] - 329s 845ms/step - loss: 21.4919 - val_loss: 28.2595
Epoch {39}
389/389 [==============================] - 326s 838ms/step - loss: 22.0989 - val_loss: 29.1410
Epoch {40}
389/389 [==============================] - 330s 847ms/step - loss: 20.5530 - val_loss: 27.6230
Epoch {41}
389/389 [==============================] - 326s 838ms/step - loss: 21.6128 - val_loss: 30.0240
Epoch {42}
389/389 [==============================] - 326s 838ms/step - loss: 21.1913 - val_loss: 28.2491
Epoch {43}
124/389 [========>.....................] - ETA: 3:22 - loss: 18.5470
IOPub message rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_msg_rate_limit`. Current values: NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec) NotebookApp.rate_limit_window=3.0 (secs)
389/389 [==============================] - 331s 850ms/step - loss: 15.3658 - val_loss: 22.2062
Epoch {90}
389/389 [==============================] - 329s 846ms/step - loss: 14.1163 - val_loss: 23.0011
Epoch {91}
389/389 [==============================] - 330s 848ms/step - loss: 14.3330 - val_loss: 22.2124
Epoch {92}
100/389 [======>.......................] - ETA: 3:49 - loss: 13.7512
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
pred = model.predict(Xtest)
1302/1302 [==============================] - 135s 101ms/step
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 133s 102ms/step - loss: 17.9300 17.929988861083984
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation: MAE: 3.026280620487356 MSE: 17.929977768579597 RMSE: 4.234380446839844 R-Squared: [0.7895483]
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
#another 50 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part3.h5')
epoch = 50
for epoch in range(0,epoch):
  print('Epoch {%d}' %(epoch))
  history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
  historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3_part4.h5')
model.summary()
Epoch {0}
389/389 [==============================] - 341s 860ms/step - loss: 9.9645 - val_loss: 18.2545
Epoch {1}
389/389 [==============================] - 330s 849ms/step - loss: 10.0845 - val_loss: 19.1041
Epoch {2}
389/389 [==============================] - 332s 852ms/step - loss: 10.3525 - val_loss: 19.0241
Epoch {3}
389/389 [==============================] - 330s 847ms/step - loss: 11.2895 - val_loss: 16.3651
Epoch {4}
389/389 [==============================] - 332s 853ms/step - loss: 12.9427 - val_loss: 19.6775
Epoch {5}
389/389 [==============================] - 334s 858ms/step - loss: 10.2216 - val_loss: 18.1022
Epoch {6}
389/389 [==============================] - 330s 849ms/step - loss: 9.6090 - val_loss: 18.8817
Epoch {7}
389/389 [==============================] - 333s 855ms/step - loss: 9.6592 - val_loss: 19.6096
Epoch {8}
389/389 [==============================] - 330s 847ms/step - loss: 15.4943 - val_loss: 18.0857
Epoch {9}
389/389 [==============================] - 344s 886ms/step - loss: 10.0890 - val_loss: 18.4702
Epoch {10}
389/389 [==============================] - 333s 855ms/step - loss: 10.9185 - val_loss: 18.3299
Epoch {11}
389/389 [==============================] - 332s 855ms/step - loss: 9.8375 - val_loss: 17.2769
Epoch {12}
389/389 [==============================] - 334s 858ms/step - loss: 9.0572 - val_loss: 18.4719
Epoch {13}
389/389 [==============================] - 330s 848ms/step - loss: 9.1006 - val_loss: 16.8966
Epoch {14}
389/389 [==============================] - 333s 855ms/step - loss: 9.3132 - val_loss: 19.2194
Epoch {15}
389/389 [==============================] - 330s 849ms/step - loss: 10.1103 - val_loss: 17.0467
Epoch {16}
389/389 [==============================] - 332s 854ms/step - loss: 10.6557 - val_loss: 18.4849
Epoch {17}
389/389 [==============================] - 332s 853ms/step - loss: 9.1632 - val_loss: 17.1429
Epoch {18}
389/389 [==============================] - 331s 850ms/step - loss: 8.9634 - val_loss: 16.3619
Epoch {19}
389/389 [==============================] - 331s 852ms/step - loss: 15.5038 - val_loss: 19.0220
Epoch {20}
389/389 [==============================] - 330s 850ms/step - loss: 9.1171 - val_loss: 16.2541
Epoch {21}
389/389 [==============================] - 334s 859ms/step - loss: 8.4189 - val_loss: 16.1693
Epoch {22}
389/389 [==============================] - 333s 857ms/step - loss: 8.5636 - val_loss: 20.3592
Epoch {23}
389/389 [==============================] - 334s 859ms/step - loss: 9.1997 - val_loss: 17.8628
Epoch {24}
389/389 [==============================] - 333s 856ms/step - loss: 9.2838 - val_loss: 17.8284
Epoch {25}
389/389 [==============================] - 334s 859ms/step - loss: 8.8097 - val_loss: 18.5538
Epoch {26}
389/389 [==============================] - 333s 856ms/step - loss: 15.5939 - val_loss: 18.6431
Epoch {27}
389/389 [==============================] - 336s 863ms/step - loss: 12.9848 - val_loss: 26.0683
Epoch {28}
389/389 [==============================] - 341s 876ms/step - loss: 18.6866 - val_loss: 17.9619
Epoch {29}
389/389 [==============================] - 336s 865ms/step - loss: 8.7301 - val_loss: 15.2111
Epoch {30}
389/389 [==============================] - 332s 854ms/step - loss: 8.1547 - val_loss: 16.6626
Epoch {31}
389/389 [==============================] - 332s 852ms/step - loss: 8.0667 - val_loss: 16.3300
Epoch {32}
389/389 [==============================] - 335s 860ms/step - loss: 9.6312 - val_loss: 18.5143
Epoch {33}
389/389 [==============================] - 338s 868ms/step - loss: 8.8509 - val_loss: 16.0280
Epoch {34}
389/389 [==============================] - 333s 856ms/step - loss: 9.1759 - val_loss: 19.9139
Epoch {35}
389/389 [==============================] - 335s 860ms/step - loss: 9.0047 - val_loss: 16.0639
Epoch {36}
389/389 [==============================] - 333s 855ms/step - loss: 7.9623 - val_loss: 17.3039
Epoch {37}
389/389 [==============================] - 335s 861ms/step - loss: 8.1814 - val_loss: 17.1028
Epoch {38}
389/389 [==============================] - 335s 861ms/step - loss: 8.6306 - val_loss: 18.1676
Epoch {39}
389/389 [==============================] - 335s 861ms/step - loss: 8.6266 - val_loss: 16.3168
Epoch {40}
389/389 [==============================] - 339s 869ms/step - loss: 8.3986 - val_loss: 16.0175
Epoch {41}
389/389 [==============================] - 335s 860ms/step - loss: 8.4103 - val_loss: 17.0997
Epoch {42}
389/389 [==============================] - 336s 864ms/step - loss: 9.0861 - val_loss: 16.6194
Epoch {43}
389/389 [==============================] - 334s 860ms/step - loss: 8.8549 - val_loss: 16.9267
Epoch {44}
389/389 [==============================] - 333s 857ms/step - loss: 8.1499 - val_loss: 15.8692
Epoch {45}
389/389 [==============================] - 337s 866ms/step - loss: 8.9060 - val_loss: 17.0316
Epoch {46}
389/389 [==============================] - 334s 857ms/step - loss: 8.2583 - val_loss: 17.9719
Epoch {47}
389/389 [==============================] - 335s 861ms/step - loss: 8.8649 - val_loss: 15.9925
Epoch {48}
389/389 [==============================] - 335s 861ms/step - loss: 8.3989 - val_loss: 16.3942
Epoch {49}
389/389 [==============================] - 336s 863ms/step - loss: 8.0296 - val_loss: 15.1184
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirection  (None, 90, 256)           141312    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 90, 128)           164352    
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 96)                6240      
                                                                 
 dense_1 (Dense)             (None, 128)               12416     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 365665 (1.39 MB)
Trainable params: 365665 (1.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
   lossTrain.append(element['loss'][0])
   lossval.append(element['val_loss'][0])
   #accTrain.append(element['accuracy'][0])
   #accVal.append(element['val_accuracy'][0])
  
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
pred = model.predict(Xtest)
1302/1302 [==============================] - 138s 104ms/step
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 136s 104ms/step - loss: 14.4658 14.465767860412598
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better 
Results by manual calculation: MAE: 2.66739492077735 MSE: 14.465755477077103 RMSE: 3.8033873687907604 R-Squared: [0.83020934]
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
 
Fine tune
newModel = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part4.h5')
#adam = optimizers.Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
adam=tf.keras.optimizers.Adam(learning_rate=0.001)
newModel.compile(loss='mse', optimizer=adam)
newModel.summary()
epochs = 25
newModel.fit(Xval,yVal, epochs=epochs, batch_size=500)
newModel.save('bilstm_predict_rul_experiment8_extended_full_take3_part4_best.h5')
newModel.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirection  (None, 90, 256)           141312    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 90, 128)           164352    
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 96)                6240      
                                                                 
 dense_1 (Dense)             (None, 128)               12416     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 365665 (1.39 MB)
Trainable params: 365665 (1.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/25
84/84 [==============================] - 78s 773ms/step - loss: 18.8138
Epoch 2/25
84/84 [==============================] - 65s 775ms/step - loss: 15.1091
Epoch 3/25
84/84 [==============================] - 66s 780ms/step - loss: 23.6433
Epoch 4/25
84/84 [==============================] - 65s 779ms/step - loss: 15.5591
Epoch 5/25
84/84 [==============================] - 65s 772ms/step - loss: 12.9244
Epoch 6/25
84/84 [==============================] - 66s 783ms/step - loss: 11.1757
Epoch 7/25
84/84 [==============================] - 65s 775ms/step - loss: 10.3805
Epoch 8/25
84/84 [==============================] - 65s 778ms/step - loss: 9.4869
Epoch 9/25
84/84 [==============================] - 64s 768ms/step - loss: 8.9791
Epoch 10/25
84/84 [==============================] - 65s 776ms/step - loss: 8.6154
Epoch 11/25
84/84 [==============================] - 65s 777ms/step - loss: 8.4584
Epoch 12/25
84/84 [==============================] - 65s 775ms/step - loss: 10.6611
Epoch 13/25
84/84 [==============================] - 65s 767ms/step - loss: 8.3051
Epoch 14/25
84/84 [==============================] - 65s 769ms/step - loss: 9.2540
Epoch 15/25
84/84 [==============================] - 65s 770ms/step - loss: 8.8023
Epoch 16/25
84/84 [==============================] - 65s 775ms/step - loss: 7.9576
Epoch 17/25
84/84 [==============================] - 64s 767ms/step - loss: 6.7822
Epoch 18/25
84/84 [==============================] - 65s 771ms/step - loss: 6.8865
Epoch 19/25
84/84 [==============================] - 64s 767ms/step - loss: 7.3095
Epoch 20/25
84/84 [==============================] - 65s 772ms/step - loss: 7.3847
Epoch 21/25
84/84 [==============================] - 65s 779ms/step - loss: 7.3081
Epoch 22/25
84/84 [==============================] - 65s 775ms/step - loss: 16.4596
Epoch 23/25
84/84 [==============================] - 65s 771ms/step - loss: 14.7284
Epoch 24/25
84/84 [==============================] - 66s 780ms/step - loss: 9.3644
Epoch 25/25
84/84 [==============================] - 65s 778ms/step - loss: 6.8536
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bidirectional (Bidirection  (None, 90, 256)           141312    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 90, 128)           164352    
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 64)                41216     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 96)                6240      
                                                                 
 dense_1 (Dense)             (None, 128)               12416     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 365665 (1.39 MB)
Trainable params: 365665 (1.39 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
print(newModel.evaluate(Xtest,yTest))
1302/1302 [==============================] - 135s 102ms/step - loss: 16.8626 16.862579345703125
pred = newModel.predict(Xtest)
1302/1302 [==============================] - 132s 99ms/step
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show() 
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better - in my case >85% after training on the val dataset
Results by manual calculation: MAE: 2.947035403738186 MSE: 16.86256980900072 RMSE: 4.106405947906358 R-Squared: [0.80207692]
results = pd.DataFrame({'Predicted':pred.flatten()})
results['Actual'] = yTest.flatten()
results
| Predicted | Actual | |
|---|---|---|
| 0 | 14.199033 | 15 | 
| 1 | 13.143090 | 13 | 
| 2 | 4.791924 | 1 | 
| 3 | 14.558222 | 19 | 
| 4 | 13.416628 | 15 | 
| ... | ... | ... | 
| 41628 | 26.579113 | 23 | 
| 41629 | 19.725632 | 12 | 
| 41630 | 21.202875 | 27 | 
| 41631 | 4.445514 | 3 | 
| 41632 | 12.522565 | 15 | 
41633 rows × 2 columns